From 3c8240486a884530130a33070566a35c10bcb8e1 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 16 Sep 2022 10:38:25 +0200 Subject: [PATCH 001/100] Add `podio-dump` tool to dump contents of podio files (#323) * Add print function to collections * Make the IReader interface capable of jumping to events * Split python parts into its own shared lib * Make it possible to read SIO files via python * Add podio-dump tool to inspect podio files Make sure print flushes the stream to ensure that python and c++ stdout are "synchronized" * Make podio-dump also dump event data --- CMakeLists.txt | 1 + include/podio/CollectionBase.h | 4 + include/podio/EventStore.h | 6 +- include/podio/GenericParameters.h | 3 + include/podio/IReader.h | 6 +- include/podio/PythonEventStore.h | 15 +++- include/podio/ROOTReader.h | 8 +- include/podio/SIOReader.h | 8 +- include/podio/TimedReader.h | 14 +++- include/podio/UserDataCollection.h | 22 ++++++ include/podio/utilities/IOHelpers.h | 17 ++++ python/EventStore.py | 10 ++- python/templates/Collection.h.jinja2 | 3 + python/templates/macros/collections.jinja2 | 7 ++ python/test_EventStore.py | 55 ++----------- python/test_EventStoreRoot.py | 50 ++++++++++++ python/test_EventStoreSio.py | 34 ++++++++ src/CMakeLists.txt | 60 ++++++++++---- src/GenericParameters.cc | 37 +++++++++ src/IOHelpers.cc | 39 ++++++++++ src/PythonEventStore.cc | 32 +++++--- src/ROOTReader.cc | 5 +- src/SIOBlock.cc | 2 +- src/SIOReader.cc | 39 ++++++---- src/python_selection.xml | 5 ++ src/selection.xml | 2 - tests/CMakeLists.txt | 3 +- tests/check_benchmark_outputs.cpp | 4 +- tests/read.cpp | 6 ++ tests/read_sio.cpp | 6 ++ tests/read_test.h | 11 +++ tools/CMakeLists.txt | 1 + tools/podio-dump | 91 ++++++++++++++++++++++ 33 files changed, 495 insertions(+), 111 deletions(-) create mode 100644 include/podio/utilities/IOHelpers.h create mode 100644 python/test_EventStoreRoot.py create mode 100644 python/test_EventStoreSio.py create mode 100644 src/IOHelpers.cc create mode 100644 src/python_selection.xml create mode 100644 tools/CMakeLists.txt create mode 100755 tools/podio-dump diff --git a/CMakeLists.txt b/CMakeLists.txt index ad9bb6331..fba80e7c3 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,6 +155,7 @@ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE #--- project specific subdirectories ------------------------------------------- add_subdirectory(python) add_subdirectory(src) +add_subdirectory(tools) if(BUILD_TESTING) add_subdirectory(tests) endif() diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index 0633de673..fbd67ec99 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -4,6 +4,7 @@ #include "podio/CollectionBuffers.h" #include "podio/ObjectID.h" +#include #include #include #include @@ -69,6 +70,9 @@ class CollectionBase { /// declare this collection to be a subset collection virtual void setSubsetCollection(bool setSubset = true) = 0; + + /// print this collection to the passed stream + virtual void print(std::ostream& os = std::cout, bool flush = true) const = 0; }; } // namespace podio diff --git a/include/podio/EventStore.h b/include/podio/EventStore.h index a399916f0..e84b7e57e 100644 --- a/include/podio/EventStore.h +++ b/include/podio/EventStore.h @@ -112,8 +112,8 @@ class EventStore : public ICollectionProvider, public IMetaDataProvider { bool doGet(const std::string& name, CollectionBase*& collection, bool setReferences = true) const; /// check if a collection of given name already exists bool collectionRegistered(const std::string& name) const; - void setCollectionIDTable(CollectionIDTable* table) { - m_table.reset(table); + void setCollectionIDTable(std::shared_ptr table) { + m_table = std::move(table); } // members @@ -121,7 +121,7 @@ class EventStore : public ICollectionProvider, public IMetaDataProvider { mutable CollContainer m_collections{}; mutable std::vector m_cachedCollections{}; IReader* m_reader{nullptr}; - std::unique_ptr m_table; + std::shared_ptr m_table; GenericParameters m_evtMD{}; RunMDMap m_runMDMap{}; diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index 5f4b0416b..08f2caac6 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -5,6 +5,7 @@ #include "podio/utilities/TypeHelpers.h" #include +#include #include #include #include @@ -177,6 +178,8 @@ class GenericParameters { _stringMap.clear(); } + void print(std::ostream& os = std::cout, bool flush = true); + /// Check if no parameter is stored (i.e. if all internal maps are empty) bool empty() const { return _intMap.empty() && _floatMap.empty() && _stringMap.empty(); diff --git a/include/podio/IReader.h b/include/podio/IReader.h index 96052de1c..12fcb6944 100644 --- a/include/podio/IReader.h +++ b/include/podio/IReader.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -28,7 +29,7 @@ class IReader { /// Does not set references yet. virtual CollectionBase* readCollection(const std::string& name) = 0; /// Get CollectionIDTable of read-in data - virtual CollectionIDTable* getCollectionIDTable() = 0; + virtual std::shared_ptr getCollectionIDTable() = 0; /// read event meta data from file virtual GenericParameters* readEventMetaData() = 0; virtual std::map* readCollectionMetaData() = 0; @@ -44,6 +45,9 @@ class IReader { virtual void openFile(const std::string& filename) = 0; virtual void closeFile() = 0; + virtual void readEvent() = 0; + virtual void goToEvent(unsigned iEvent) = 0; + /// Get the podio version with which the current file has been written virtual podio::version::Version currentFileVersion() const = 0; }; diff --git a/include/podio/PythonEventStore.h b/include/podio/PythonEventStore.h index 9279ed218..5ab68f15e 100644 --- a/include/podio/PythonEventStore.h +++ b/include/podio/PythonEventStore.h @@ -2,7 +2,10 @@ #define PODIO_PYTHONEVENTSTORE_H #include "podio/EventStore.h" -#include "podio/ROOTReader.h" +#include "podio/GenericParameters.h" +#include "podio/IReader.h" + +#include namespace podio { @@ -29,17 +32,21 @@ class PythonEventStore { } bool isValid() const { - return m_reader.isValid(); + return m_reader && m_reader->isValid(); } void close() { - m_reader.closeFiles(); + m_reader->closeFile(); } /// list available collections const std::vector& getCollectionNames() const; + const podio::GenericParameters& getEventMetaData() { + return m_store.getEventMetaData(); + } + private: - podio::ROOTReader m_reader; + std::unique_ptr m_reader; podio::EventStore m_store; /// set to true if input root file accessible, false otherwise diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index 1db7d4202..09dfa646d 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -48,10 +48,10 @@ class ROOTReader : public IReader { void closeFiles(); /// Read all collections requested - void readEvent(); + void readEvent() override; /// Read CollectionIDTable from ROOT file - CollectionIDTable* getCollectionIDTable() override { + std::shared_ptr getCollectionIDTable() override { return m_table; } @@ -62,7 +62,7 @@ class ROOTReader : public IReader { void endOfEvent() override; /// Preparing to read a given event - void goToEvent(unsigned evnum); + void goToEvent(unsigned evnum) override; podio::version::Version currentFileVersion() const override { return m_fileVersion; @@ -104,7 +104,7 @@ class ROOTReader : public IReader { // collection after it has been read the very first time std::map m_storedClasses{}; - CollectionIDTable* m_table{nullptr}; + std::shared_ptr m_table{nullptr}; TChain* m_chain{nullptr}; unsigned m_eventNumber{0}; diff --git a/include/podio/SIOReader.h b/include/podio/SIOReader.h index 730fa1878..284dc13df 100644 --- a/include/podio/SIOReader.h +++ b/include/podio/SIOReader.h @@ -42,10 +42,12 @@ class SIOReader : public IReader { void closeFile() override; /// Read all collections requested - void readEvent(); + void readEvent() override; + + void goToEvent(unsigned iEvent) override; /// Read CollectionIDTable from SIO file - CollectionIDTable* getCollectionIDTable() override { + std::shared_ptr getCollectionIDTable() override { return m_table; } @@ -88,7 +90,7 @@ class SIOReader : public IReader { typedef std::pair Input; std::vector m_inputs{}; - CollectionIDTable* m_table{nullptr}; // will be owned by the EventStore + std::shared_ptr m_table{nullptr}; // Co-owned by the EventStore int m_eventNumber{0}; int m_lastEventRead{-1}; std::vector m_typeNames{}; diff --git a/include/podio/TimedReader.h b/include/podio/TimedReader.h index a37d28efb..288258da3 100644 --- a/include/podio/TimedReader.h +++ b/include/podio/TimedReader.h @@ -23,7 +23,8 @@ class TimedReader : public IReader { m_end(ClockT::now()), m_recorder(recorder), m_perEventTree(m_recorder.addTree( - "event_times", {"read_collections", "read_ev_md", "read_run_md", "read_coll_md", "end_of_event"})) { + "event_times", + {"read_collections", "read_ev_md", "read_run_md", "read_coll_md", "end_of_event", "read_event"})) { m_recorder.addTree("setup_times", {"constructor", "open_file", "close_file", "read_collection_ids", "get_entries"}); m_recorder.recordTime("setup_times", "constructor", m_end - m_start); } @@ -46,7 +47,7 @@ class TimedReader : public IReader { } /// Get CollectionIDTable of read-in data - CollectionIDTable* getCollectionIDTable() override { + std::shared_ptr getCollectionIDTable() override { return runTimed(false, "read_collection_ids", &IReader::getCollectionIDTable); } @@ -90,6 +91,15 @@ class TimedReader : public IReader { runVoidTimed(false, "close_file", &IReader::closeFile); } + void readEvent() override { + runVoidTimed(true, "read_event", &IReader::readEvent); + } + + void goToEvent(unsigned ev) override { + // TODO: Do we need to time this? Not really used at the moment + m_reader.goToEvent(ev); + } + podio::version::Version currentFileVersion() const override { // no need to time this as it is really just a very simple get return m_reader.currentFileVersion(); diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index 5a34cac55..461004e80 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -142,6 +142,22 @@ class UserDataCollection : public CollectionBase { void setSubsetCollection(bool) override { } + /// Print this collection to the passed stream + void print(std::ostream& os = std::cout, bool flush = true) const override { + os << "["; + if (!_vec.empty()) { + os << _vec[0]; + for (size_t i = 0; i < _vec.size(); ++i) { + os << ", " << _vec[i]; + } + } + os << "]"; + + if (flush) { + os.flush(); // Necessary for python + } + } + // ----- some wrapers for std::vector and access to the complete std::vector (if really needed) typename std::vector::iterator begin() { @@ -185,6 +201,12 @@ class UserDataCollection : public CollectionBase { // don't make this macro public as it should only be used internally here... #undef PODIO_ADD_USER_TYPE +template > +std::ostream& operator<<(std::ostream& o, const podio::UserDataCollection& coll) { + coll.print(o); + return o; +} + } // namespace podio #endif diff --git a/include/podio/utilities/IOHelpers.h b/include/podio/utilities/IOHelpers.h new file mode 100644 index 000000000..b11ed76bc --- /dev/null +++ b/include/podio/utilities/IOHelpers.h @@ -0,0 +1,17 @@ +#ifndef PODIO_UTILITIES_IOHELPERS_H +#define PODIO_UTILITIES_IOHELPERS_H + +#ifndef PODIO_ENABLE_SIO + #define PODIO_ENABLE_SIO 0 +#endif + +#include "podio/IReader.h" + +#include +#include + +namespace podio { +std::unique_ptr createReader(const std::string& filename); +} + +#endif diff --git a/python/EventStore.py b/python/EventStore.py index 2bcd8480c..c1a2df3d4 100644 --- a/python/EventStore.py +++ b/python/EventStore.py @@ -2,7 +2,7 @@ from ROOT import gSystem -gSystem.Load("libpodioRootIO") # noqa: E402 +gSystem.Load("libpodioPythonStore") # noqa: E402 from ROOT import podio # noqa: E402 # pylint: disable=wrong-import-position @@ -73,6 +73,14 @@ def get(self, name): coll.__getitem__ = getitem return coll + def collections(self): + """Return list of all collection names.""" + return [str(c) for c in self.current_store.getCollectionNames()] + + def metadata(self): + """Get the metadata of the current event as GenericParameters""" + return self.current_store.getEventMetaData() + def isValid(self): """Check if the EventStore is in a valid state""" return self.current_store is not None and self.current_store.isValid() diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index 93d667988..80b963ccb 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -58,6 +58,9 @@ public: void clear() final; + /// Print this collection to the passed stream + void print(std::ostream& os=std::cout, bool flush=true) const final; + /// operator to allow pointer like calling of members a la LCIO {{ class.bare_type }}Collection* operator->() { return ({{ class.bare_type }}Collection*) this; } diff --git a/python/templates/macros/collections.jinja2 b/python/templates/macros/collections.jinja2 index 255d2aa0f..bfe2e49a1 100644 --- a/python/templates/macros/collections.jinja2 +++ b/python/templates/macros/collections.jinja2 @@ -144,4 +144,11 @@ std::ostream& operator<<(std::ostream& o, const {{ class.bare_type }}Collection& o.flags(old_flags); return o; } + +void {{ class.bare_type }}Collection::print(std::ostream& os, bool flush) const { + os << *this; + if (flush) { + os.flush(); + } +} {%- endmacro %} diff --git a/python/test_EventStore.py b/python/test_EventStore.py index 61d20f00a..3202f4843 100644 --- a/python/test_EventStore.py +++ b/python/test_EventStore.py @@ -1,20 +1,17 @@ """Unit tests for the EventStore class""" - -import os -import unittest from EventStore import EventStore -from ROOT import TFile - -class EventStoreTestCase(unittest.TestCase): - """EventStore unit tests""" - def setUp(self): - self.filename = str('example.root') - self.assertTrue(os.path.isfile(self.filename)) - self.store = EventStore([self.filename]) +class EventStoreBaseTestCaseMixin: + """EventStore unit tests + These define some tests that should work regardless of the backend that is + used. In order to not have to duplicate this functionality for each backend, + this base class defines the common tests and inheriting classes define a + corresponding setUp method that sets up the correct EventStore and potentially + additional backend specific functionality + """ def test_eventloop(self): self.assertTrue(len(self.store) >= 0) self.assertEqual(self.store.current_store.getEntries(), @@ -92,43 +89,7 @@ def test_hash(self): # testing that the hits stored as a one to many relation # import pdb; pdb.set_trace() - def test_chain(self): - self.store = EventStore([self.filename, - self.filename]) - rootfile = TFile(self.filename) - events = rootfile.Get(str('events')) - numbers = [] - for event in self.store: - evinfo = event.get("info") - numbers.append(evinfo[0].Number()) - - self.assertEqual(len(numbers), 2 * events.GetEntries()) - # testing that numbers is [0, .. 1999, 0, .. 1999] - self.assertEqual(numbers, list(range(events.GetEntries())) * 2) - # trying to go to an event beyond the last one - self.assertRaises(ValueError, self.store.__getitem__, 4001) - # this is in the first event in the second file, - # so its event number should be 0. - self.assertEqual(self.store[2000].get("info")[0].Number(), 0) - def test_context_managers(self): with EventStore([self.filename]) as store: self.assertTrue(len(store) >= 0) self.assertTrue(store.isValid()) - - def test_no_file(self): - '''Test that non-accessible files are gracefully handled.''' - with self.assertRaises(ValueError): - self.store = EventStore('foo.root') - - -if __name__ == "__main__": - from ROOT import gSystem - from subprocess import call - gSystem.Load("libTestDataModel") - # creating example file for the tests - if not os.path.isfile('example.root'): - WRITE_CMD = f'{os.environ["PODIO"]}/tests/write' - print(WRITE_CMD) - call(WRITE_CMD) - unittest.main() diff --git a/python/test_EventStoreRoot.py b/python/test_EventStoreRoot.py new file mode 100644 index 000000000..4acf74950 --- /dev/null +++ b/python/test_EventStoreRoot.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +"""Python unit tests for the ROOT backend""" + +import unittest +import os + +from ROOT import TFile + +from EventStore import EventStore +from test_EventStore import EventStoreBaseTestCaseMixin + + +class EventStoreRootTestCase(EventStoreBaseTestCaseMixin, unittest.TestCase): + """Test cases for root input files""" + def setUp(self): + """Setup an EventStore reading from a ROOT file""" + self.filename = 'example.root' + self.assertTrue(os.path.isfile(self.filename)) + self.store = EventStore(['example.root']) + + def test_chain(self): + self.store = EventStore([self.filename, + self.filename]) + rootfile = TFile(self.filename) + events = rootfile.Get(str('events')) + numbers = [] + for iev, _ in enumerate(self.store): + evinfo = self.store.get("info") + numbers.append(evinfo[0].Number()) + self.assertEqual(iev + 1, 2 * events.GetEntries()) # pylint: disable=undefined-loop-variable + # testing that numbers is [0, .. 1999, 0, .. 1999] + self.assertEqual(numbers, list(range(events.GetEntries())) * 2) + # trying to go to an event beyond the last one + self.assertRaises(ValueError, self.store.__getitem__, 4001) + # this is in the first event in the second file, + # so its event number should be 0. + self.assertEqual(self.store[2000].get("info")[0].Number(), 0) + + def test_no_file(self): + '''Test that non-accessible files are gracefully handled.''' + with self.assertRaises(ValueError): + self.store = EventStore('foo.root') + + +if __name__ == '__main__': + # NOTE: These tests are really not intended to be run directly as they depend + # on quite some environment setup as well as externally produced inputs. + # See the CMakeLists.txt file in the tests folder for the specifics of that + # environment and the inputs + unittest.main() diff --git a/python/test_EventStoreSio.py b/python/test_EventStoreSio.py new file mode 100644 index 000000000..409039d95 --- /dev/null +++ b/python/test_EventStoreSio.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +"""Python unit tests for the SIO backend""" + +import unittest +import os + +from EventStore import EventStore +from test_EventStore import EventStoreBaseTestCaseMixin + + +SKIP_SIO_TESTS = os.environ.get('SKIP_SIO_TESTS', '1') == '1' + + +@unittest.skipIf(SKIP_SIO_TESTS, "no SIO support") +class EventStoreSioTestCase(EventStoreBaseTestCaseMixin, unittest.TestCase): + """Test cases for root input files""" + def setUp(self): + """setup an EventStore reading an SIO file""" + self.filename = 'example.sio' + self.assertTrue(os.path.isfile(self.filename)) + self.store = EventStore([self.filename]) + + def test_no_file(self): + '''Test that non-accessible files are gracefully handled.''' + with self.assertRaises(ValueError): + self.store = EventStore('foo.sio') + + +if __name__ == '__main__': + # NOTE: These tests are really not intended to be run directly as they depend + # on quite some environment setup as well as externally produced inputs. + # See the CMakeLists.txt file in the tests folder for the specifics of that + # environment and the inputs + unittest.main() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1d6d005cf..9c4b5bd87 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,20 +1,29 @@ # This is needed for older ROOTs which do not understand # target usage requirements -file(GLOB sources *.cc) -LIST(APPEND sources ${CMAKE_CURRENT_SOURCE_DIR}/rootUtils.h) -SET(root_sources ${sources}) +SET(sources + CollectionIDTable.cc + GenericParameters.cc + ASCIIWriter.cc + EventStore.cc) -# --- Store the sources for sio into a separate list -if(ENABLE_SIO) - SET(sio_sources ${sources}) - LIST(FILTER sio_sources INCLUDE REGEX SIO.*) -endif() +SET(root_sources + rootUtils.h + ROOTWriter.cc + ROOTReader.cc +) -# Remove SIO and ROOT related things from the core library -LIST(FILTER sources EXCLUDE REGEX SIO.*) -LIST(FILTER sources EXCLUDE REGEX ROOT.*|PythonEventStore.*|root[a-z|A-z|0-9].* ) -LIST(FILTER root_sources INCLUDE REGEX ROOT.*|PythonEventStore.*|root[a-z|A-z|0-9].* ) +SET(sio_sources + SIOReader.cc + SIOWriter.cc + SIOBlockUserData.cc + SIOBlock.cc +) + +SET(python_sources + IOHelpers.cc + PythonEventStore.cc + ) # Main Library, no external dependencies add_library(podio SHARED ${sources}) @@ -47,7 +56,6 @@ SET(headers ${CMAKE_SOURCE_DIR}/include/podio/ICollectionProvider.h ${CMAKE_SOURCE_DIR}/include/podio/IReader.h ${CMAKE_SOURCE_DIR}/include/podio/ObjectID.h - ${CMAKE_SOURCE_DIR}/include/podio/PythonEventStore.h ${CMAKE_SOURCE_DIR}/include/podio/UserDataCollection.h ${CMAKE_SOURCE_DIR}/include/podio/podioVersion.h ) @@ -59,6 +67,24 @@ set_target_properties(podioDict-dictgen PROPERTIES EXCLUDE_FROM_ALL TRUE) target_sources(podioDict PRIVATE podioDict.cxx) +add_library(podioPythonStore SHARED ${python_sources}) +target_link_libraries(podioPythonStore podio podioRootIO) +LIST(APPEND INSTALL_LIBRARIES podioPythonStore) + +add_library(podioPythonStoreDict SHARED) +target_include_directories(podioPythonStoreDict PUBLIC + $ + $ +) +target_link_libraries(podioPythonStoreDict PUBLIC podioPythonStore) +SET(python_headers + ${CMAKE_SOURCE_DIR}/include/podio/PythonEventStore.h +) +PODIO_GENERATE_DICTIONARY(podioPythonStoreDict ${python_headers} SELECTION python_selection.xml + OPTIONS --library ${CMAKE_SHARED_LIBRARY_PREFIX}podioPythonStoreDict${CMAKE_SHARED_MODULE_SUFFIX}) +set_target_properties(podioPythonStoreDict-dictgen PROPERTIES EXCLUDE_FROM_ALL TRUE) +target_sources(podioPythonStoreDict PRIVATE podioPythonStoreDict.cxx) + # SIO I/O library if(ENABLE_SIO) add_library(podioSioIO SHARED ${sio_sources}) @@ -69,10 +95,14 @@ if(ENABLE_SIO) $) target_link_libraries(podioSioIO PUBLIC podio::podio SIO::sio ${CMAKE_DL_LIBS} ${PODIO_FS_LIBS}) + # also make the python EventStore understand SIO + target_link_libraries(podioPythonStore podioSioIO) + target_compile_definitions(podioPythonStore PRIVATE PODIO_ENABLE_SIO=1) + LIST(APPEND INSTALL_LIBRARIES podioSioIO) endif() -install(TARGETS podio podioDict podioRootIO ${INSTALL_LIBRARIES} +install(TARGETS podio podioDict podioPythonStoreDict podioRootIO ${INSTALL_LIBRARIES} EXPORT podioTargets DESTINATION "${CMAKE_INSTALL_LIBDIR}") @@ -80,4 +110,6 @@ install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio DESTINATION "${CMAKE_INSTA install(FILES ${CMAKE_CURRENT_BINARY_DIR}/podioDictDict.rootmap ${CMAKE_CURRENT_BINARY_DIR}/libpodioDict_rdict.pcm + ${CMAKE_CURRENT_BINARY_DIR}/podioPythonStoreDictDict.rootmap + ${CMAKE_CURRENT_BINARY_DIR}/libpodioPythonStoreDict_rdict.pcm DESTINATION "${CMAKE_INSTALL_LIBDIR}") diff --git a/src/GenericParameters.cc b/src/GenericParameters.cc index 4f7685c2b..654f4dd61 100644 --- a/src/GenericParameters.cc +++ b/src/GenericParameters.cc @@ -1,6 +1,7 @@ #include "podio/GenericParameters.h" #include +#include namespace podio { @@ -82,4 +83,40 @@ void GenericParameters::setValues(const std::string& key, const StringVec& value setValue(key, values); } +template +std::ostream& operator<<(std::ostream& os, const std::vector& values) { + os << "["; + if (!values.empty()) { + os << values[0]; + for (size_t i = 1; i < values.size(); ++i) { + os << ", " << values[i]; + } + } + + return os << "]"; +} + +template +void printMap(const MapType& map, std::ostream& os) { + os << std::left << std::setw(30) << "Key " + << "Value " << '\n'; + os << "--------------------------------------------------------------------------------\n"; + for (const auto& [key, value] : map) { + os << std::left << std::setw(30) << key << value << '\n'; + } +} + +void GenericParameters::print(std::ostream& os, bool flush) { + os << "int parameters\n\n"; + printMap(getMap(), os); + os << "\nfloat parameters\n"; + printMap(getMap(), os); + os << "\nstd::string parameters\n"; + printMap(getMap(), os); + + if (flush) { + os.flush(); + } +} + } // namespace podio diff --git a/src/IOHelpers.cc b/src/IOHelpers.cc new file mode 100644 index 000000000..7dd764083 --- /dev/null +++ b/src/IOHelpers.cc @@ -0,0 +1,39 @@ +#include "podio/utilities/IOHelpers.h" + +#include "podio/ROOTReader.h" + +#if PODIO_ENABLE_SIO + #include "podio/SIOReader.h" +#endif + +namespace podio { +std::unique_ptr createReader(const std::string& filename) { + const auto fileEnding = [&filename]() -> std::string { + const auto n = filename.rfind('.'); + if (n != std::string::npos) { + return filename.substr(n); + } + return ""; + }(); + + if (fileEnding.empty()) { + return nullptr; + } + + if (fileEnding == ".root") { + return std::make_unique(); + } else if (fileEnding == ".sio") { +#if PODIO_ENABLE_SIO + return std::make_unique(); +#else + std::cerr << "PODIO: You are trying to open a .sio file but podio has not been built with SIO support\nMake sure " + "to build PODIO with SIO support to be able to read .sio files" + << std::endl; + return nullptr; +#endif + } else { + return nullptr; + } +} + +} // namespace podio diff --git a/src/PythonEventStore.cc b/src/PythonEventStore.cc index 53499ed62..979bfb261 100644 --- a/src/PythonEventStore.cc +++ b/src/PythonEventStore.cc @@ -1,21 +1,31 @@ #include "podio/PythonEventStore.h" +#include "podio/ROOTReader.h" +#include "podio/utilities/IOHelpers.h" + #include #include #include -podio::PythonEventStore::PythonEventStore(const char* name) : m_reader(), m_store() { +podio::PythonEventStore::PythonEventStore(const char* name) : m_reader(podio::createReader(name)), m_store() { std::ifstream inputfile(name); m_isZombie = inputfile.good() ? false : true; - // the file could be a remote file that we cannot access but root - // knows how to handle via the xrootd protocol. - // if that is the case we ignore m_isZombie. - if (!std::string("root:/").compare(0, 6, name, 6)) { - m_isZombie = false; + + if (m_reader) { + if (m_isZombie && dynamic_cast(m_reader.get())) { + // the file could be a remote file that we cannot access but root + // knows how to handle via the xrootd protocol. + // if that is the case we ignore m_isZombie. + if (!std::string("root:/").compare(0, 6, name, 6)) { + m_isZombie = false; + } + } } + if (!m_isZombie) { - m_reader.openFiles({std::string(name)}); - m_store.setReader(&m_reader); + // at this point we have a combination of file and reader that should work + m_reader->openFile(name); + m_store.setReader(m_reader.get()); } } @@ -27,16 +37,16 @@ const podio::CollectionBase* podio::PythonEventStore::get(const char* name) { void podio::PythonEventStore::endOfEvent() { m_store.clear(); - m_reader.endOfEvent(); + m_reader->endOfEvent(); } void podio::PythonEventStore::goToEvent(unsigned ievent) { m_store.clear(); - m_reader.goToEvent(ievent); + m_reader->goToEvent(ievent); } unsigned podio::PythonEventStore::getEntries() const { - return m_reader.getEntries(); + return m_reader->getEntries(); } const std::vector& podio::PythonEventStore::getCollectionNames() const { diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 2ce5e4960..d5ae89775 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -150,8 +150,9 @@ void ROOTReader::openFiles(const std::vector& filenames) { // NOTE: This is a small pessimization, if we do not read all collections // afterwards, but it makes the handling much easier in general auto metadatatree = static_cast(m_chain->GetFile()->Get("metadata")); - m_table = new CollectionIDTable(); - metadatatree->SetBranchAddress("CollectionIDs", &m_table); + m_table = std::make_shared(); + auto* table = m_table.get(); + metadatatree->SetBranchAddress("CollectionIDs", &table); podio::version::Version* versionPtr{nullptr}; if (auto* versionBranch = root_utils::getBranch(metadatatree, "PodioVersion")) { diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index f2da1f6b3..db2b777af 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -14,7 +14,7 @@ namespace podio { SIOCollectionIDTableBlock::SIOCollectionIDTableBlock(podio::EventStore* store) : sio::block("CollectionIDs", sio::version::encode_version(0, 3)) { - const auto* table = store->getCollectionIDTable(); + const auto table = store->getCollectionIDTable(); _names = table->names(); _ids = table->ids(); _types.reserve(_names.size()); diff --git a/src/SIOReader.cc b/src/SIOReader.cc index 59d8f427e..39d43cb32 100644 --- a/src/SIOReader.cc +++ b/src/SIOReader.cc @@ -37,22 +37,16 @@ CollectionBase* SIOReader::readCollection(const std::string& name) { } std::map* SIOReader::readCollectionMetaData() { - // Only read the data if it hasn't been read already - if (!m_collectionMetaData->data) { - m_collectionMetaData->data = new ColMDMap(); - readMetaDataRecord(m_collectionMetaData); - } - + // Always read a new map, because the EventStore takes ownership + m_collectionMetaData->data = new ColMDMap(); + readMetaDataRecord(m_collectionMetaData); return m_collectionMetaData->data; } std::map* SIOReader::readRunMetaData() { - // Only read the data if it hasn't been read already - if (!m_runMetaData->data) { - m_runMetaData->data = new RunMDMap(); - readMetaDataRecord(m_runMetaData); - } - + // Always read a new map, because the EventStore takes ownership + m_runMetaData->data = new RunMDMap(); + readMetaDataRecord(m_runMetaData); return m_runMetaData->data; } @@ -110,6 +104,24 @@ void SIOReader::endOfEvent() { m_inputs.clear(); } +void SIOReader::goToEvent(unsigned eventNumber) { + // If we are already past the desired event number, rewind to the start first + if (eventNumber < (unsigned)m_eventNumber) { + m_stream.clear(); + m_stream.seekg(0); + m_eventNumber = 0; + } + + sio::api::go_to_record(m_stream, "event_record"); + if ((eventNumber - m_eventNumber) > 0) { + sio::api::skip_n_records(m_stream, eventNumber - m_eventNumber); + } + m_eventNumber = eventNumber; + + m_inputs.clear(); + m_blocks.clear(); +} + void SIOReader::createBlocks() { // make sure that the first block is EventMetaData as it is also the first // during wrting @@ -139,7 +151,8 @@ void SIOReader::readCollectionIDTable() { sio::api::read_blocks(m_unc_buffer.span(), blocks); auto* idTableBlock = static_cast(blocks[0].get()); - m_table = idTableBlock->getTable(); + m_table = std::make_shared(); + m_table.reset(idTableBlock->getTable()); m_typeNames = idTableBlock->getTypeNames(); m_subsetCollectionBits = idTableBlock->getSubsetCollectionBits(); m_fileVersion = static_cast(blocks[1].get())->version; diff --git a/src/python_selection.xml b/src/python_selection.xml new file mode 100644 index 000000000..61b39b94c --- /dev/null +++ b/src/python_selection.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/src/selection.xml b/src/selection.xml index 3e0e1857e..1777b2d3d 100644 --- a/src/selection.xml +++ b/src/selection.xml @@ -13,8 +13,6 @@ - - diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7f8f5faf8..cde097bcc 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -150,7 +150,8 @@ set_property(TEST pyunittest PROPERTY ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH} - ROOT_INCLUDE_PATH= + ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel + SKIP_SIO_TESTS=$> ) set_property(TEST pyunittest PROPERTY DEPENDS write) diff --git a/tests/check_benchmark_outputs.cpp b/tests/check_benchmark_outputs.cpp index e3a98eac7..eb268f9d4 100644 --- a/tests/check_benchmark_outputs.cpp +++ b/tests/check_benchmark_outputs.cpp @@ -72,8 +72,8 @@ int main(int, char* argv[]) { const StringVec readBMSetupBranches = {"constructor", "open_file", "close_file", "get_entries", "read_collection_ids"}; - const StringVec readBMEventBranches = {"read_collections", "read_ev_md", "read_run_md", "read_coll_md", - "end_of_event"}; + const StringVec readBMEventBranches = {"read_collections", "read_ev_md", "read_run_md", + "read_coll_md", "end_of_event", "read_event"}; verifyBMFile(argv[2], readBMSetupBranches, readBMEventBranches); return 0; diff --git a/tests/read.cpp b/tests/read.cpp index a12738379..066b817d6 100644 --- a/tests/read.cpp +++ b/tests/read.cpp @@ -10,6 +10,12 @@ int main() { run_read_test(reader); + // jump back and forth a bit + run_read_test_event(reader, 10); + run_read_test_event(reader, 150); + run_read_test_event(reader, 120); + run_read_test_event(reader, 0); + reader.closeFile(); return 0; } diff --git a/tests/read_sio.cpp b/tests/read_sio.cpp index a45830eed..d1fd8c8c9 100644 --- a/tests/read_sio.cpp +++ b/tests/read_sio.cpp @@ -11,6 +11,12 @@ int main() { run_read_test(reader); + // jump back and forth a bit + run_read_test_event(reader, 10); + run_read_test_event(reader, 150); + run_read_test_event(reader, 120); + run_read_test_event(reader, 0); + reader.closeFile(); return 0; } diff --git a/tests/read_test.h b/tests/read_test.h index d4ed8c9c9..9b12cff6a 100644 --- a/tests/read_test.h +++ b/tests/read_test.h @@ -418,4 +418,15 @@ void run_read_test(podio::IReader& reader) { } } +// Same as above but only for a specified event +void run_read_test_event(podio::IReader& reader, unsigned event) { + auto store = podio::EventStore(); + store.setReader(&reader); + + reader.goToEvent(event); + processEvent(store, event, reader.currentFileVersion()); + store.clear(); + reader.endOfEvent(); +} + #endif // PODIO_TESTS_READ_TEST_H diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 000000000..a3cfeb29e --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1 @@ +install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/tools/podio-dump b/tools/podio-dump new file mode 100755 index 000000000..ac6c55f1c --- /dev/null +++ b/tools/podio-dump @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +"""podio-dump tool to dump contents of podio files""" + +from EventStore import EventStore + + +def dump_evt_overview(event, ievt): + """Print an overview table of the event contents of the given event""" + print('{:#^82}'.format(f' Event {ievt} ')) + print('{:<30} {:<40} {:<10}'.format('Name', 'Type', 'Size')) + print('-' * 82) + for name in event.collections(): + coll = event.get(name) + print(f'{name:<30} {coll.getValueTypeName():<40} {len(coll):<10}') + + +def dump_overview(store, events): + """Print an overview for all the desired events""" + for ievt in events: + event = store[ievt] + dump_evt_overview(event, ievt) + + +def dump_evt_detailed(event, ievt): + """Dump this event in all its glory""" + print() + print('{:#^82}'.format(f' Event {ievt} ')) + print() + + print('Parameters', flush=True) + event.metadata().print() + print(flush=True) + + for name in event.collections(): + print(name, flush=True) + event.get(name).print() + print(flush=True) + + +def dump_detailed(store, events): + """Dump the complete event contents for all desired events""" + for ievt in events: + event = store[ievt] + dump_evt_detailed(event, ievt) + print() + + +def main(args): + """Main""" + store = EventStore([args.inputfile]) + if args.detailed: + dump_detailed(store, args.event) + else: + dump_overview(store, args.event) + + +def parse_evt_range(evt_string): + """Parse which events to print""" + try: + return [int(evt_string)] + except ValueError: + pass + + try: + return [int(i) for i in evt_string.split(',')] + except ValueError: + pass + + try: + first, last = [int(i) for i in evt_string.split(':')] + return list(range(first, last + 1)) + except ValueError: + pass + + raise argparse.ArgumentTypeError(f'\'{evt_string}\' cannot be parsed into a list of events') + + +if __name__ == '__main__': + import argparse + # pylint: disable=invalid-name # before 2.5.0 pylint is too strict with the naming here + parser = argparse.ArgumentParser(description='Dump contents of a podio file to stdout') + parser.add_argument('inputfile', help='Name of the file to dump content from') + parser.add_argument('-e', '--event', + help='Which event(s) to print. A single number, comma separated list of numbers' + ' or "first:last" for an inclusive range of events. Defaults to the first event.', + type=parse_evt_range, default=[0]) + parser.add_argument('-d', '--detailed', help='Dump the full event contents not just the collection info', + action='store_true', default=False) + + clargs = parser.parse_args() + main(clargs) From e1096884df7c42e44ed3c51640f2d1cab110bab5 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 16 Sep 2022 16:28:07 +0200 Subject: [PATCH 002/100] First version of Frame I/O (#287) --- doc/frame.md | 157 ++++++++ include/podio/CollectionBase.h | 5 +- include/podio/CollectionBuffers.h | 72 +++- include/podio/CollectionIDTable.h | 17 +- include/podio/Frame.h | 414 ++++++++++++++++++++++ include/podio/GenericParameters.h | 53 ++- include/podio/ROOTFrameData.h | 70 ++++ include/podio/ROOTFrameReader.h | 120 +++++++ include/podio/ROOTFrameWriter.h | 87 +++++ include/podio/ROOTReader.h | 2 +- include/podio/SIOBlock.h | 34 +- include/podio/SIOBlockUserData.h | 22 +- include/podio/SIOFrameData.h | 89 +++++ include/podio/SIOFrameReader.h | 63 ++++ include/podio/SIOFrameWriter.h | 41 +++ include/podio/UserDataCollection.h | 16 +- include/podio/utilities/TypeHelpers.h | 10 + python/templates/Collection.cc.jinja2 | 30 +- python/templates/Collection.h.jinja2 | 6 +- python/templates/CollectionData.cc.jinja2 | 19 +- python/templates/CollectionData.h.jinja2 | 11 +- python/templates/SIOBlock.cc.jinja2 | 52 ++- python/templates/SIOBlock.h.jinja2 | 17 +- python/templates/macros/sioblocks.jinja2 | 4 +- src/CMakeLists.txt | 5 + src/CollectionIDTable.cc | 10 +- src/GenericParameters.cc | 17 + src/ROOTFrameReader.cc | 275 ++++++++++++++ src/ROOTFrameWriter.cc | 139 ++++++++ src/ROOTReader.cc | 2 +- src/ROOTWriter.cc | 3 +- src/SIOBlock.cc | 16 +- src/SIOFrameData.cc | 98 +++++ src/SIOFrameReader.cc | 113 ++++++ src/SIOFrameWriter.cc | 136 +++++++ src/SIOReader.cc | 8 +- src/SIOWriter.cc | 1 + src/rootUtils.h | 59 ++- tests/CMakeLists.txt | 32 +- tests/CTestCustom.cmake | 5 + tests/frame.cpp | 361 +++++++++++++++++++ tests/read_frame.cpp | 7 + tests/read_frame.h | 58 +++ tests/read_frame_sio.cpp | 7 + tests/read_test.h | 102 ++++-- tests/unittest.cpp | 31 ++ tests/write_frame.h | 390 ++++++++++++++++++++ tests/write_frame_root.cpp | 8 + tests/write_frame_sio.cpp | 8 + 49 files changed, 3205 insertions(+), 97 deletions(-) create mode 100644 doc/frame.md create mode 100644 include/podio/Frame.h create mode 100644 include/podio/ROOTFrameData.h create mode 100644 include/podio/ROOTFrameReader.h create mode 100644 include/podio/ROOTFrameWriter.h create mode 100644 include/podio/SIOFrameData.h create mode 100644 include/podio/SIOFrameReader.h create mode 100644 include/podio/SIOFrameWriter.h create mode 100644 src/ROOTFrameReader.cc create mode 100644 src/ROOTFrameWriter.cc create mode 100644 src/SIOFrameData.cc create mode 100644 src/SIOFrameReader.cc create mode 100644 src/SIOFrameWriter.cc create mode 100644 tests/frame.cpp create mode 100644 tests/read_frame.cpp create mode 100644 tests/read_frame.h create mode 100644 tests/read_frame_sio.cpp create mode 100644 tests/write_frame.h create mode 100644 tests/write_frame_root.cpp create mode 100644 tests/write_frame_sio.cpp diff --git a/doc/frame.md b/doc/frame.md new file mode 100644 index 000000000..cab0a9910 --- /dev/null +++ b/doc/frame.md @@ -0,0 +1,157 @@ +# The `Frame` concept +The `podio::Frame` is a general data container for collection data of podio generated EDMs. +Additionally, it offers the functionality to store some (limited) data outside of an EDM. +The basic idea of the `Frame` is to give users of podio the possibility to organize EDM data into logical units and to potentially build a hierarchy of different `Frame`s. +Common examples would be the organisation of data into *Events* and *Runs*. +However, it is important to note that podio does really not impose any meaning on any `Frame` and each `Frame` is essentially defined by its contents. + +## Basic functionality of a `Frame` +The main functionality of a `Frame` is to store and aggregate EDM collection data and it also offers the possibility to store some generic data alongside. +To ensure thread safety and const-correctness a `Frame` takes ownership of any data that is put into it and only gives read access to immutable data. +This is mandated by the interface for collection data (simplified here for better readability): +```cpp +struct Frame { +template +const CollT& put(CollT&& coll, const std::string& name); + +void put(std::unique_ptr coll, const std::string& name); + +template +const CollT& get(const std::string& name) const; + +template +void putParameter(const std::string& name, T value); + +template +const T& getParameter(const std::string); +}; +``` +In this case there are two ways to get collection data into the `Frame` +1. By passing a concrete collection (of type `CollT`) into the `Frame` as an [`rvalue`](https://en.cppreference.com/w/cpp/language/value_category). There are two ways to achieve this, either by passing the return value of a function directly into `Frame::put` or by explicitly moving it in the call via `std::move` if you are using a named variable. +2. By passing a `std::unique_ptr` to a collection. Similar to the first case, this can either be the return value of a function call, or has to be done via `std::move` (as mandated by the `std::unique_ptr` interface). + +In both cases, if you passed in a named variable, the user is left with a *moved-from object*, which has to be in a *valid but indefinite* state, and cannot be used afterwards. +Some compilers and static code analysis tools are able to detect the accidental usage of *moved-from* objects. + +For putting in parameters the basic principle is very similar, with the major difference being, that for *trivial* types `getParameter` will actually return by value. + +For all use cases there is some `enable_if` machinery in place to ensure that only valid collections and valid parameter types can actually be used. +These checks also make sure that it is impossible to put in collections without handing over ownership to the `Frame`. + +### Usage examples for collection data +These are a few very basic usage examples that highlight the main functionality (and potential pitfalls). + +#### Putting collection data into the `Frame` +In all of the following examples, the following basic setup is assumed: +```cpp +#include "podio/Frame.h" + +#include "edm4hep/MCParticleCollection.h" // just to have a concrete example + +// create an empty Frame +auto frame = podio::Frame(); +``` + +Assuming there is a function that creates an `MCParticleCollection` putting the return value into the `Frame` is very simple +```cpp +edm4hep::MCParticleCollection createMCParticles(); // implemented somewhere else + +// put the return value of a function into the Frame +frame.put(createMCParticles(), "particles"); + +// put the return value into the Frame but keep the const reference +auto& particles = frame.put(createMCParticles(), "moreParticles"); +``` + +If working with named variables it is necessary to use `std::move` to put collections into the `Frame`. +The `Frame` will refuse to compile in case a named variable is not moved. +Assuming the same `createMCParticles` function as above, this looks like the following + +```cpp +auto coll = createMCParticles(); +// potentially still modify the collection + +// Need to use std::move now that the collection has a name +frame.put(std::move(coll), "particles"); + +// Keeping a const reference is also possible +// NOTE: We are explicitly using a new variable name here +auto coll2 = createMCParticles(); +auto& particles = frame.put(std::move(coll2), "MCParticles"); +``` +At this point only `particles` is in a valid and **defined** state. + +#### Getting collection (references) from the `Frame` +Obtaining immutable (`const`) references to collections stored in the `Frame` is trivial. +Here we are assuming that the collections are actually present in the `Frame`. +```cpp +auto& particles = frame.get("particles"); +``` + +### Usage for Parameters +Parameters are using the `podio::GenericParameters` class behind the scene. +Hence, the types that can be used are `int`, `float`, and `std::string` as well as as `std::vectors` of those. +For better usability, some overloads for `putParameter` exist to allow for an *in-place* construction, like, e.g. +```cpp +// Passing in a const char* for a std::string +frame.putParameter("aString", "a string value"); + +// Creating a vector of ints on the fly +frame.putParameter("ints", {1, 2, 3, 4}); +``` + +## I/O basics and philosophy +podio offers all the necessary functionality to read and write `Frame`s. +However, it is not in the scope of podio to organize them into a hierarchy, nor +to maintain such a hierarchy. When writing data to file `Frame`s are written to +the file in the order they are passed to the writer. For reading them back podio +offers random access to stored `Frame`s, which should make it possible to +restore any hierarchy again. The Writers and Readers of podio are supposed to be +run on and accessed by only one single thread. + +### Writing a `Frame` +For writing a `Frame` the writers can ask each `Frame` for `CollectionWriteBuffers` for each collection that should be written. +In these buffers the underlying data is still owned by the collection, and by extension the `Frame`. +This makes it possible to write the same collection with several different writers. +Writers can access a `Frame` from several different threads, even though each writer is assumed to be on only one thread. +For writing the `GenericParameters` that are stored in the `Frame` and for other necessary data, similar access functionality is offered by the `Frame`. + +### Reading a `Frame` +When reading a `Frame` readers do not have to return a complete `Frame`. +Instead they return a more or less arbitrary type of `FrameData` that simply has to provide the following public interface. +```cpp +struct FrameData { + /// Get a (copy) of the internal collection id table + podio::CollectionIDTable getIDTable() const; + + /// Get the buffers to construct the collection with the given name + std::optional getCollectionBuffers(const std::string& name); + + /// Get the still available, i.e. yet unpacked, collections from the raw data + std::vector getAvailableCollections() const; + + /// Get the parameters that are stored in the raw data + std::unique_ptr getParameters(); +}; +``` +A `Frame` is constructed with a (`unique_ptr` of such) `FrameData` and handles everything from there. +Note that the `FrameData` type of any I/O backend is a free type without inheritance as the `Frame` constructor is templated on this. +This splitting of reading data from file and constructing a `Frame` from it later has some advantages: +- Since podio assumes that reading is done single threaded the amount of time that is actually spent in a reader is minimized, as only the file operations need to be done on a single thread. All further processing (potential decompression, unpacking, etc.) can be done on a different thread where the `Frame` is actually constructed. +- It gives different backends the necessary freedom to exploit different optimization strategies and does not force them to conform to an implementation that is potentially detrimental to performance. +- It also makes it possible to pass around data from which a `Frame` can be constructed without having to actually construct one. +- Readers do not have to know how to construct collections from the buffers, as they are only required to provide the buffers themselves. + +### Schema evolution +Schema evolution happens on the `CollectionReadBuffers` when they are requested from the `FrameData` inside the `Frame`. +It is possible for the I/O backend to handle schema evolution before the `Frame` sees the buffers for the first time. +In that case podio schema evolution becomes a simple check. + +# Frame implementation and design +One of the main concerns of the `Frame` is to offer one common, non-templated, interface while still supporting different I/O backends and potentially different *policies*. +The "classic" approach would be to have an abstract `IFrame` interface with several implementations that offer the desired functionality (and their small differences). +One problem with that approach is that a purely abstract interface cannot have templated member functions. Hence, the desired type-safe behavior of `get` and `put` would be very hard to implement. +Additionally, policies ideally affect orthogonal aspects of the `Frame` behavior. +Implementing all possible combinations of behaviors through implementations of an abstract interface would lead to quite a bit of code duplication and cannot take advantage of the factorization of the problem. +To solve these problems, we chose to implement the `Frame` via [*Type Erasure*](https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Type_Erasure). +This also has the advantage that the `Frame` also has *value semantics* in line with the design of podio. diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index fbd67ec99..670291ba3 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -44,7 +44,10 @@ class CollectionBase { virtual unsigned getID() const = 0; /// Get the collection buffers for this collection - virtual podio::CollectionBuffers getBuffers() = 0; + virtual podio::CollectionWriteBuffers getBuffers() = 0; + + /// Create (empty) collection buffers from which a collection can be constructed + virtual podio::CollectionReadBuffers createBuffers() /*const*/ = 0; /// check for validity of the container after read virtual bool isValid() const = 0; diff --git a/include/podio/CollectionBuffers.h b/include/podio/CollectionBuffers.h index 87d2dce71..d69ff0288 100644 --- a/include/podio/CollectionBuffers.h +++ b/include/podio/CollectionBuffers.h @@ -3,6 +3,7 @@ #include "podio/ObjectID.h" +#include #include #include #include @@ -10,6 +11,8 @@ namespace podio { +class CollectionBase; + template using UVecPtr = std::unique_ptr>; @@ -20,16 +23,81 @@ using VectorMembersInfo = std::vector>; * Simple helper struct that bundles all the potentially necessary buffers that * are necessary to represent a collection for I/O purposes. */ -struct CollectionBuffers { +struct CollectionWriteBuffers { void* data{nullptr}; CollRefCollection* references{nullptr}; VectorMembersInfo* vectorMembers{nullptr}; template std::vector* dataAsVector() { + return asVector(data); + } + + template + static std::vector* asVector(void* raw) { // Are we at a beach? I can almost smell the C... - return *static_cast**>(data); + return *static_cast**>(raw); + } +}; + +struct CollectionReadBuffers { + void* data{nullptr}; + CollRefCollection* references{nullptr}; + VectorMembersInfo* vectorMembers{nullptr}; + + using CreateFuncT = std::function(podio::CollectionReadBuffers, bool)>; + using RecastFuncT = std::function; + + CollectionReadBuffers(void* d, CollRefCollection* ref, VectorMembersInfo* vec, CreateFuncT&& createFunc, + RecastFuncT&& recastFunc) : + data(d), + references(ref), + vectorMembers(vec), + createCollection(std::move(createFunc)), + recast(std::move(recastFunc)) { } + + CollectionReadBuffers() = default; + CollectionReadBuffers(const CollectionReadBuffers&) = default; + CollectionReadBuffers& operator=(const CollectionReadBuffers&) = default; + + CollectionReadBuffers(CollectionWriteBuffers buffers) : + data(buffers.data), references(buffers.references), vectorMembers(buffers.vectorMembers) { + } + + template + std::vector* dataAsVector() { + return asVector(data); + } + + template + static std::vector* asVector(void* raw) { + // Are we at a beach? I can almost smell the C... + return static_cast*>(raw); + } + + CreateFuncT createCollection{}; + + // This is a hacky workaround for the ROOT backend at the moment. There is + // probably a better solution, but I haven't found it yet. The problem is the + // following: + // + // When creating a pointer to a vector, either via new or via + // TClass::New(), we get a void*, that can be cast back to a vector with + // + // static_cast*>(raw); + // + // However, as soon as we pass that same void* to TBranch::SetAddress this no + // longer works and the actual cast has to be + // + // *static_cast**>(raw); + // + // To make it possible to always use the first form, after we leave the Root + // parts of reading, this function is populated in the createBuffers call of each + // datatype where we have the necessary type information (from code + // generation) to do the second cast and assign the result of that to the data + // field again. + RecastFuncT recast{}; }; } // namespace podio diff --git a/include/podio/CollectionIDTable.h b/include/podio/CollectionIDTable.h index 5573b3e2c..c639a904c 100644 --- a/include/podio/CollectionIDTable.h +++ b/include/podio/CollectionIDTable.h @@ -1,6 +1,7 @@ #ifndef PODIO_COLLECTIONIDTABLE_H #define PODIO_COLLECTIONIDTABLE_H +#include #include #include #include @@ -12,11 +13,20 @@ class CollectionIDTable { public: /// default constructor CollectionIDTable() = default; + ~CollectionIDTable() = default; + + CollectionIDTable(const CollectionIDTable&) = delete; + CollectionIDTable& operator=(const CollectionIDTable&) = delete; + CollectionIDTable(CollectionIDTable&&) = default; + CollectionIDTable& operator=(CollectionIDTable&&) = default; /// constructor from existing ID:name mapping CollectionIDTable(std::vector&& ids, std::vector&& names) : m_collectionIDs(std::move(ids)), m_names(std::move(names)){}; + CollectionIDTable(const std::vector& ids, const std::vector& names) : + m_collectionIDs(ids), m_names(names){}; + /// return collection ID for given name int collectionID(const std::string& name) const; @@ -43,10 +53,15 @@ class CollectionIDTable { /// Prints collection information void print() const; + /// Does this table hold any information? + bool empty() const { + return m_names.empty(); + } + private: std::vector m_collectionIDs{}; std::vector m_names{}; - mutable std::mutex m_mutex{}; + mutable std::unique_ptr m_mutex{std::make_unique()}; }; } // namespace podio diff --git a/include/podio/Frame.h b/include/podio/Frame.h new file mode 100644 index 000000000..557eddceb --- /dev/null +++ b/include/podio/Frame.h @@ -0,0 +1,414 @@ +#ifndef PODIO_FRAME_H +#define PODIO_FRAME_H + +#include "podio/CollectionBase.h" +#include "podio/CollectionIDTable.h" +#include "podio/GenericParameters.h" +#include "podio/ICollectionProvider.h" +#include "podio/utilities/TypeHelpers.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace podio { + +/// Alias template for enabling overloads only for Collections +template +using EnableIfCollection = typename std::enable_if_t>; + +/// Alias template for enabling overloads only for Collection r-values +template +using EnableIfCollectionRValue = typename std::enable_if_t && !std::is_lvalue_reference_v>; + +namespace detail { + /** The minimal interface for raw data types + */ + struct EmptyFrameData { + podio::CollectionIDTable getIDTable() const { + return {}; + } + + std::optional getCollectionBuffers(const std::string&) { + return std::nullopt; + } + + /** Get the still available, i.e. yet unpacked, collections from the raw data + */ + std::vector getAvailableCollections() const { + return {}; + } + + /** Get the parameters that are stored in the raw data + */ + std::unique_ptr getParameters() { + return std::make_unique(); + } + }; +} // namespace detail + +template +std::optional unpack(FrameDataT* data, const std::string& name) { + return data->getCollectionBuffers(name); +} + +/** + * Frame class that serves as a container of collection and meta data. + */ +class Frame { + /** + * Internal abstract interface for the type-erased implementation of the Frame + * class + */ + struct FrameConcept { + virtual ~FrameConcept() = default; + virtual const podio::CollectionBase* get(const std::string& name) const = 0; + virtual const podio::CollectionBase* put(std::unique_ptr coll, const std::string& name) = 0; + virtual podio::GenericParameters& parameters() = 0; + virtual const podio::GenericParameters& parameters() const = 0; + + virtual std::vector availableCollections() const = 0; + + // Writing interface. Need this to be able to store all necessary information + // TODO: Figure out whether this can be "hidden" somehow + virtual podio::CollectionIDTable getIDTable() const = 0; + }; + + /** + * The interface implementation of the abstract FrameConcept that is necessary + * for a type-erased implementation of the Frame class + */ + template + struct FrameModel final : FrameConcept, public ICollectionProvider { + + FrameModel(std::unique_ptr data); + ~FrameModel() = default; + FrameModel(const FrameModel&) = delete; + FrameModel& operator=(const FrameModel&) = delete; + FrameModel(FrameModel&&) = default; + FrameModel& operator=(FrameModel&&) = default; + + /** Try and get the collection from the internal storage and return a + * pointer to it if found. Otherwise return a nullptr + */ + const podio::CollectionBase* get(const std::string& name) const final; + + /** Try and place the collection into the internal storage and return a + * pointer to it. If a collection already exists or insertion fails, return + * a nullptr + */ + const podio::CollectionBase* put(std::unique_ptr coll, const std::string& name) final; + + /** Get a reference to the internally used GenericParameters + */ + podio::GenericParameters& parameters() override { + return *m_parameters; + } + /** Get a const reference to the internally used GenericParameters + */ + const podio::GenericParameters& parameters() const override { + return *m_parameters; + }; + + bool get(int collectionID, podio::CollectionBase*& collection) const override; + + podio::CollectionIDTable getIDTable() const override { + // Make a copy + return {m_idTable.ids(), m_idTable.names()}; + } + + std::vector availableCollections() const override; + + private: + podio::CollectionBase* doGet(const std::string& name, bool setReferences = true) const; + + using CollectionMapT = std::unordered_map>; + + mutable CollectionMapT m_collections{}; ///< The internal map for storing unpacked collections + mutable std::unique_ptr m_mapMtx{nullptr}; ///< The mutex for guarding the internal collection map + std::unique_ptr m_data{nullptr}; ///< The raw data read from file + mutable std::unique_ptr m_dataMtx{nullptr}; ///< The mutex for guarding the raw data + podio::CollectionIDTable m_idTable{}; ///< The collection ID table + std::unique_ptr m_parameters{nullptr}; ///< The generic parameter store for this frame + mutable std::set m_retrievedIDs{}; ///< The IDs of the collections that we have already read (but not yet put + ///< into the map) + }; + + std::unique_ptr m_self; ///< The internal concept pointer through which all the work is done + +public: + /** Empty Frame constructor + */ + Frame(); + + /** Frame constructor from (almost) arbitrary raw data + */ + template + Frame(std::unique_ptr); + + // The frame is a non-copyable type + Frame(const Frame&) = delete; + Frame& operator=(const Frame&) = delete; + + Frame(Frame&&) = default; + Frame& operator=(Frame&&) = default; + + /** Frame destructor */ + ~Frame() = default; + + /** Get a collection from the Frame + */ + template > + const CollT& get(const std::string& name) const; + + /** (Destructively) move a collection into the Frame and get a const reference + * back for further use + */ + template > + const CollT& put(CollT&& coll, const std::string& name); + + /** Move a collection into the Frame handing over ownership to the Frame + */ + void put(std::unique_ptr coll, const std::string& name); + + /** Add a value to the parameters of the Frame (if the type is supported). + * Copy the value into the internal store + */ + template > + void putParameter(const std::string& key, T value) { + m_self->parameters().setValue(key, value); + } + + /** Add a string value to the parameters of the Frame by copying it. Dedicated + * overload for enabling the on-the-fly conversion on the string literals. + */ + void putParameter(const std::string& key, std::string value) { + putParameter(key, std::move(value)); + } + + /** Add a vector of strings to the parameters of the Frame (via copy). + * Dedicated overload for enabling on-the-fly conversions of initializer_list + * of string literals. + */ + void putParameter(const std::string& key, std::vector values) { + putParameter>(key, std::move(values)); + } + + /** Add a vector of values into the parameters of the Frame. Overload for + * catching on-the-fly conversions of initializer_lists of values. + */ + template >> + void putParameter(const std::string& key, std::initializer_list&& values) { + putParameter>(key, std::move(values)); + } + + /** Retrieve parameters via key from the internal store. Return type will + * either by a const reference or a value depending on the desired type. + */ + template > + podio::GenericDataReturnType getParameter(const std::string& key) const { + return m_self->parameters().getValue(key); + } + + /** Get all **currently** available collections (including potentially + * unpacked ones from raw data) + */ + std::vector getAvailableCollections() const { + return m_self->availableCollections(); + } + + // Interfaces for writing below + // TODO: Hide this from the public interface somehow? + + /** + * Get the GenericParameters for writing + */ + const podio::GenericParameters& getGenericParametersForWrite() const { + return m_self->parameters(); + } + + /** + * Get a collection for writing (in a prepared and "ready-to-write" state) + */ + const podio::CollectionBase* getCollectionForWrite(const std::string& name) const { + const auto* coll = m_self->get(name); + if (coll) { + coll->prepareForWrite(); + } + + return coll; + } + + podio::CollectionIDTable getCollectionIDTableForWrite() const { + return m_self->getIDTable(); + } +}; + +// implementations below + +Frame::Frame() : Frame(std::make_unique()) { +} + +template +Frame::Frame(std::unique_ptr data) : m_self(std::make_unique>(std::move(data))) { +} + +template +const CollT& Frame::get(const std::string& name) const { + const auto* coll = dynamic_cast(m_self->get(name)); + if (coll) { + return *coll; + } + // TODO: Handle non-existing collections + static const auto emptyColl = CollT(); + return emptyColl; +} + +void Frame::put(std::unique_ptr coll, const std::string& name) { + const auto* retColl = m_self->put(std::move(coll), name); + if (!retColl) { + // TODO: Handle collisions + } +} + +template +const CollT& Frame::put(CollT&& coll, const std::string& name) { + const auto* retColl = static_cast(m_self->put(std::make_unique(std::move(coll)), name)); + if (retColl) { + return *retColl; + } + // TODO: Handle collision case + static const auto emptyColl = CollT(); + return emptyColl; +} + +template +Frame::FrameModel::FrameModel(std::unique_ptr data) : + m_mapMtx(std::make_unique()), + m_data(std::move(data)), + m_dataMtx(std::make_unique()), + m_idTable(std::move(m_data->getIDTable())), + m_parameters(std::move(m_data->getParameters())) { +} + +template +const podio::CollectionBase* Frame::FrameModel::get(const std::string& name) const { + return doGet(name); +} + +template +podio::CollectionBase* Frame::FrameModel::doGet(const std::string& name, bool setReferences) const { + { + // First check whether the collection is in the map already + // + // Collections only land here if they are fully unpacked, i.e. + // prepareAfterRead has been called or it has been put into the Frame + std::lock_guard lock{*m_mapMtx}; + if (const auto it = m_collections.find(name); it != m_collections.end()) { + return it->second.get(); + } + } + + podio::CollectionBase* retColl = nullptr; + + // Now try to get it from the raw data if we have the possibility + if (m_data) { + // Have the buffers in the outer scope here to hold the raw data lock as + // briefly as possible + auto buffers = std::optional{std::nullopt}; + { + std::lock_guard lock{*m_dataMtx}; + buffers = unpack(m_data.get(), name); + } + if (buffers) { + auto coll = buffers->createCollection(buffers.value(), buffers->data == nullptr); + coll->prepareAfterRead(); + coll->setID(m_idTable.collectionID(name)); + { + std::lock_guard mapLock{*m_mapMtx}; + auto [it, success] = m_collections.emplace(name, std::move(coll)); + // TODO: Check success? Or simply assume that everything is fine at this point? + // TODO: Collision handling? + retColl = it->second.get(); + } + + if (setReferences) { + retColl->setReferences(this); + } + } + } + + return retColl; +} + +template +bool Frame::FrameModel::get(int collectionID, CollectionBase*& collection) const { + const auto& name = m_idTable.name(collectionID); + const auto& [_, inserted] = m_retrievedIDs.insert(collectionID); + + if (!inserted) { + auto coll = doGet(name); + if (coll) { + collection = coll; + return true; + } + } else { + auto coll = doGet(name, false); + if (coll) { + collection = coll; + return true; + } + } + + return false; +} + +template +const podio::CollectionBase* Frame::FrameModel::put(std::unique_ptr coll, + const std::string& name) { + { + std::lock_guard lock{*m_mapMtx}; + auto [it, success] = m_collections.try_emplace(name, std::move(coll)); + if (success) { + // TODO: Check whether this collection is already known to the idTable + // -> What to do on collision? + // -> Check before we emplace it into the internal map to prevent possible + // collisions from collections that are potentially present from rawdata? + it->second->setID(m_idTable.add(name)); + return it->second.get(); + } + } + + return nullptr; +} + +template +std::vector Frame::FrameModel::availableCollections() const { + // TODO: Check if there is a more efficient way to do this. Currently this is + // done very conservatively, but in a way that should always work, regardless + // of assumptions. It might be possible to simply return what is in the + // idTable here, because that should in principle encompass everything that is + // in the raw data as well as things that have been put into the frame + + // Lock both the internal map and the rawdata for this + std::scoped_lock lock{*m_mapMtx, *m_dataMtx}; + + auto collections = m_data->getAvailableCollections(); + collections.reserve(collections.size() + m_collections.size()); + + for (const auto& [name, _] : m_collections) { + collections.push_back(name); + } + + return collections; +} + +} // namespace podio + +#endif // PODIO_FRAME_H diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index 08f2caac6..5da2c22ef 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include @@ -78,8 +80,24 @@ class GenericParameters { using IntMap = MapType; using FloatMap = MapType; using StringMap = MapType; + // need mutex pointers for having the possibility to copy/move GenericParameters + using MutexPtr = std::unique_ptr; public: + GenericParameters() = default; + + /// GenericParameters are copyable + /// NOTE: This is currently mainly done to keep the ROOT I/O happy, because + /// that needs a copy constructor + GenericParameters(const GenericParameters&); + GenericParameters& operator=(const GenericParameters&) = delete; + + /// GenericParameters are default moveable + GenericParameters(GenericParameters&&) = default; + GenericParameters& operator=(GenericParameters&&) = default; + + ~GenericParameters() = default; + /// Get the value that is stored under the given key, by const reference or by /// value depending on the desired type template > @@ -240,15 +258,32 @@ class GenericParameters { } } + /// Get the mutex that guards the map for the given type + template + std::mutex& getMutex() const { + if constexpr (std::is_same_v, int>) { + return *(m_intMtx.get()); + } else if constexpr (std::is_same_v, float>) { + return *(m_floatMtx.get()); + } else { + return *(m_stringMtx.get()); + } + } + private: - IntMap _intMap{}; - FloatMap _floatMap{}; - StringMap _stringMap{}; + IntMap _intMap{}; ///< The map storing the integer values + mutable MutexPtr m_intMtx{std::make_unique()}; ///< The mutex guarding the integer map + FloatMap _floatMap{}; ///< The map storing the float values + mutable MutexPtr m_floatMtx{std::make_unique()}; ///< The mutex guarding the float map + StringMap _stringMap{}; ///< The map storing the double values + mutable MutexPtr m_stringMtx{std::make_unique()}; ///< The mutex guarding the float map }; template GenericDataReturnType GenericParameters::getValue(const std::string& key) const { const auto& map = getMap(); + auto& mtx = getMutex(); + std::lock_guard lock{mtx}; const auto it = map.find(key); // If there is no entry to the key, we just return an empty default // TODO: make this case detectable from the outside @@ -269,11 +304,15 @@ GenericDataReturnType GenericParameters::getValue(const std::string& key) con template void GenericParameters::setValue(const std::string& key, T value) { auto& map = getMap(); + auto& mtx = getMutex(); + if constexpr (detail::isVector) { + std::lock_guard lock{mtx}; map.insert_or_assign(key, std::move(value)); } else { // Wrap the value into a vector with exactly one entry and store that std::vector v = {value}; + std::lock_guard lock{mtx}; map.insert_or_assign(key, std::move(v)); } } @@ -281,6 +320,8 @@ void GenericParameters::setValue(const std::string& key, T value) { template size_t GenericParameters::getN(const std::string& key) const { const auto& map = getMap(); + auto& mtx = getMutex(); + std::lock_guard lock{mtx}; if (const auto it = map.find(key); it != map.end()) { return it->second.size(); } @@ -292,7 +333,11 @@ std::vector GenericParameters::getKeys() const { std::vector keys; const auto& map = getMap(); keys.reserve(map.size()); - std::transform(map.begin(), map.end(), std::back_inserter(keys), [](const auto& pair) { return pair.first; }); + { + auto& mtx = getMutex(); + std::lock_guard lock{mtx}; + std::transform(map.begin(), map.end(), std::back_inserter(keys), [](const auto& pair) { return pair.first; }); + } return keys; } diff --git a/include/podio/ROOTFrameData.h b/include/podio/ROOTFrameData.h new file mode 100644 index 000000000..157e3fbea --- /dev/null +++ b/include/podio/ROOTFrameData.h @@ -0,0 +1,70 @@ +#ifndef PODIO_ROOTFRAMEDATA_H +#define PODIO_ROOTFRAMEDATA_H + +#include "podio/CollectionBuffers.h" +#include "podio/CollectionIDTable.h" +#include "podio/GenericParameters.h" + +#include +#include +#include +#include + +namespace podio { + +class ROOTFrameData { + using CollIDPtr = std::shared_ptr; + +public: + using BufferMap = std::unordered_map; + + ROOTFrameData() = delete; + ~ROOTFrameData() = default; + ROOTFrameData(ROOTFrameData&&) = default; + ROOTFrameData& operator=(ROOTFrameData&&) = default; + ROOTFrameData(const ROOTFrameData&) = delete; + ROOTFrameData& operator=(const ROOTFrameData&) = delete; + + ROOTFrameData(BufferMap&& buffers, CollIDPtr&& idTable, podio::GenericParameters&& params) : + m_buffers(std::move(buffers)), m_idTable(idTable), m_parameters(std::move(params)) { + } + + std::optional getCollectionBuffers(const std::string& name) { + const auto bufferHandle = m_buffers.extract(name); + if (bufferHandle.empty()) { + return std::nullopt; + } + + return {bufferHandle.mapped()}; + } + + podio::CollectionIDTable getIDTable() const { + // Construct a copy of the internal table + return {m_idTable->ids(), m_idTable->names()}; + } + + std::unique_ptr getParameters() { + return std::make_unique(std::move(m_parameters)); + } + + std::vector getAvailableCollections() const { + std::vector collections; + collections.reserve(m_buffers.size()); + for (const auto& [name, _] : m_buffers) { + collections.push_back(name); + } + + return collections; + } + +private: + // TODO: switch to something more elegant once the basic functionality and + // interface is better defined + BufferMap m_buffers{}; + // This is co-owned by each FrameData and the original reader. (for now at least) + CollIDPtr m_idTable{nullptr}; + podio::GenericParameters m_parameters{}; +}; +} // namespace podio + +#endif // PODIO_ROOTFRAMEDATA_H diff --git a/include/podio/ROOTFrameReader.h b/include/podio/ROOTFrameReader.h new file mode 100644 index 000000000..b82f44a87 --- /dev/null +++ b/include/podio/ROOTFrameReader.h @@ -0,0 +1,120 @@ +#ifndef PODIO_ROOTFRAMEREADER_H +#define PODIO_ROOTFRAMEREADER_H + +#include "podio/CollectionBranches.h" +#include "podio/ROOTFrameData.h" +#include "podio/podioVersion.h" + +#include "TChain.h" + +#include +#include +#include +#include +#include +#include + +// forward declarations +class TClass; +// class TChain; +class TFile; +class TTree; + +namespace podio { + +namespace detail { + // Information about the data vector as wall as the collection class type + // and the index in the collection branches cache vector + using CollectionInfo = std::tuple; + +} // namespace detail + +class EventStore; +class CollectionBase; +class CollectionIDTable; +class GenericParameters; +struct CollectionReadBuffers; + +/** + * This class has the function to read available data from disk + * and to prepare collections and buffers. + **/ +class ROOTFrameReader { + +public: + ROOTFrameReader() = default; + ~ROOTFrameReader() = default; + + // non-copyable + ROOTFrameReader(const ROOTFrameReader&) = delete; + ROOTFrameReader& operator=(const ROOTFrameReader&) = delete; + + void openFile(const std::string& filename); + + void openFiles(const std::vector& filenames); + + /** + * Read the next data entry from which a Frame can be constructed for the + * given name. In case there are no more entries left for this name or in + * case there is no data for this name, this returns a nullptr. + */ + std::unique_ptr readNextEntry(const std::string& name); + + /// Returns number of entries for the given name + unsigned getEntries(const std::string& name) const; + + podio::version::Version currentFileVersion() const { + return m_fileVersion; + } + +private: + /** + * Helper struct to group together all the necessary state to read / process a + * given category. A "category" in this case describes all frames with the + * same name which are constrained by the ROOT file structure that we use to + * have the same contents. It encapsulates all state that is necessary for + * reading from a TTree / TChain (i.e. collection infos, branches, ...) + */ + struct CategoryInfo { + /// constructor from chain for more convenient map insertion + CategoryInfo(std::unique_ptr&& c) : chain(std::move(c)) { + } + std::unique_ptr chain{nullptr}; ///< The TChain with the data + unsigned entry{0}; ///< The next entry to read + std::vector> storedClasses{}; ///< The stored collections in this + ///< category + std::vector branches{}; ///< The branches for this category + std::shared_ptr table{nullptr}; ///< The collection ID table for this category + }; + + /** + * Initialze the passed CategoryInfo by setting up the necessary branches, + * collection infos and all necessary meta data to be able to read entries + * with this name + */ + void initCategory(CategoryInfo& catInfo, const std::string& name); + + /** + * Get the category information for the given name. In case there is no TTree + * with contents for the given name this will return a CategoryInfo with an + * uninitialized chain (nullptr) member + */ + CategoryInfo& getCategoryInfo(const std::string& name); + + GenericParameters readEventMetaData(CategoryInfo& catInfo); + + /** + * Get / read the buffers at index iColl in the passed category information + */ + podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl); + + std::unique_ptr m_metaChain{nullptr}; ///< The metadata tree + std::unordered_map m_categories{}; ///< All categories + std::vector m_availCategories{}; ///< All available categories from this file + + podio::version::Version m_fileVersion{0, 0, 0}; +}; + +} // namespace podio + +#endif // PODIO_ROOTFRAMEREADER_H diff --git a/include/podio/ROOTFrameWriter.h b/include/podio/ROOTFrameWriter.h new file mode 100644 index 000000000..9428ed929 --- /dev/null +++ b/include/podio/ROOTFrameWriter.h @@ -0,0 +1,87 @@ +#ifndef PODIO_ROOTFRAMEWRITER_H +#define PODIO_ROOTFRAMEWRITER_H + +#include "podio/CollectionBranches.h" +#include "podio/CollectionIDTable.h" + +#include "TFile.h" + +#include +#include +#include +#include +#include + +// forward declarations +class TTree; + +namespace podio { +class Frame; +class CollectionBase; +class GenericParameters; + +class ROOTFrameWriter { +public: + ROOTFrameWriter(const std::string& filename); + ~ROOTFrameWriter() = default; + + ROOTFrameWriter(const ROOTFrameWriter&) = delete; + ROOTFrameWriter& operator=(const ROOTFrameWriter&) = delete; + + /** Store the given frame with the given category. Store all available + * collections from the Frame. + * + * NOTE: The contents of the first Frame that is written in this way + * determines the contents that will be written for all subsequent Frames. + */ + void writeFrame(const podio::Frame& frame, const std::string& category); + + /** Store the given Frame with the given category. Store only the + * collections that are passed. + * + * NOTE: The contents of the first Frame that is written in this way + * determines the contents that will be written for all subsequent Frames. + */ + void writeFrame(const podio::Frame& frame, const std::string& category, const std::vector& collsToWrite); + + /** Write the current file, including all the necessary metadata to read it again. + */ + void finish(); + +private: + using StoreCollection = std::pair; + + // collectionID, collectionType, subsetCollection + // NOTE: same as in rootUtils.h private header! + using CollectionInfoT = std::tuple; + + /** + * Helper struct to group together all necessary state to write / process a + * given category. Created during the first writing of a category + */ + struct CategoryInfo { + TTree* tree{nullptr}; ///< The TTree to which this category is written + std::vector branches{}; ///< The branches for this category + std::vector collInfo{}; ///< Collection info for this category + podio::CollectionIDTable idTable{}; ///< The collection id table for this category + std::vector collsToWrite{}; ///< The collections to write for this category + }; + + /// Initialize the branches for this category + void initBranches(CategoryInfo& catInfo, const std::vector& collections, + /*const*/ podio::GenericParameters& parameters); + + /// Get the (potentially uninitialized category information for this category) + CategoryInfo& getCategoryInfo(const std::string& category); + + static void resetBranches(std::vector& branches, + const std::vector& collections, + /*const*/ podio::GenericParameters* parameters); + + std::unique_ptr m_file{nullptr}; ///< The storage file + std::unordered_map m_categories{}; ///< All categories +}; + +} // namespace podio + +#endif // PODIO_ROOTFRAMEWRITER_H diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index 09dfa646d..ba33cae16 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -2,7 +2,6 @@ #define PODIO_ROOTREADER_H #include "podio/CollectionBranches.h" -#include "podio/ICollectionProvider.h" #include "podio/IReader.h" #include @@ -26,6 +25,7 @@ class CollectionBase; class Registry; class CollectionIDTable; class GenericParameters; + /** This class has the function to read available data from disk and to prepare collections and buffers. diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index ce7ee9937..fdc4bd8a8 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -35,7 +35,11 @@ class SIOBlock : public sio::block { SIOBlock& operator=(const SIOBlock&) = delete; podio::CollectionBase* getCollection() { - return _col; + return m_buffers.createCollection(m_buffers, m_subsetColl).release(); + } + + podio::CollectionReadBuffers getBuffers() const { + return m_buffers; } std::string name() { @@ -43,16 +47,18 @@ class SIOBlock : public sio::block { } void setCollection(podio::CollectionBase* col) { - _col = col; + m_subsetColl = col->isSubsetCollection(); + m_buffers = col->getBuffers(); } virtual SIOBlock* create(const std::string& name) const = 0; // create a new collection for this block - virtual void createCollection(const bool subsetCollection = false) = 0; + virtual void createBuffers(const bool subsetCollection = false) = 0; protected: - podio::CollectionBase* _col{}; + bool m_subsetColl{false}; + podio::CollectionReadBuffers m_buffers{}; }; /** @@ -65,6 +71,15 @@ class SIOCollectionIDTableBlock : public sio::block { SIOCollectionIDTableBlock(podio::EventStore* store); + SIOCollectionIDTableBlock(std::vector&& names, std::vector&& ids, std::vector&& types, + std::vector&& isSubsetColl) : + sio::block("CollectionIDs", sio::version::encode_version(0, 3)), + _names(std::move(names)), + _ids(std::move(ids)), + _types(std::move(types)), + _isSubsetColl(std::move(isSubsetColl)) { + } + SIOCollectionIDTableBlock(const SIOCollectionIDTableBlock&) = delete; SIOCollectionIDTableBlock& operator=(const SIOCollectionIDTableBlock&) = delete; @@ -208,6 +223,13 @@ class SIOFileTOCRecord { size_t getNRecords(const std::string& name) const; + /** Get the position of the iEntry-th record with the given name. If no entry + * with the given name is recorded, return 0. Note there is no internal check + * on whether the given name actually has iEntry records. Use getNRecords to + * check for that if necessary. + */ + PositionType getPosition(const std::string& name, unsigned iEntry = 0) const; + private: friend struct SIOFileTOCRecordBlock; @@ -221,6 +243,10 @@ struct SIOFileTOCRecordBlock : public sio::block { SIOFileTOCRecordBlock() : sio::block(sio_helpers::SIOTocRecordName, sio::version::encode_version(0, 1)) { } + SIOFileTOCRecordBlock(SIOFileTOCRecord* r) : + sio::block(sio_helpers::SIOTocRecordName, sio::version::encode_version(0, 1)), record(r) { + } + SIOFileTOCRecordBlock(const SIOFileTOCRecordBlock&) = delete; SIOFileTOCRecordBlock& operator=(const SIOFileTOCRecordBlock&) = delete; diff --git a/include/podio/SIOBlockUserData.h b/include/podio/SIOBlockUserData.h index 208a8e59f..7ce28cd8e 100644 --- a/include/podio/SIOBlockUserData.h +++ b/include/podio/SIOBlockUserData.h @@ -1,6 +1,7 @@ #ifndef PODIO_SIOBLOCKUSERDATA_H #define PODIO_SIOBLOCKUSERDATA_H +#include "podio/CollectionBuffers.h" #include "podio/SIOBlock.h" #include "podio/UserDataCollection.h" @@ -37,30 +38,37 @@ class SIOBlockUserData : public podio::SIOBlock { } void read(sio::read_device& device, sio::version_type /*version*/) override { - auto collBuffers = _col->getBuffers(); - auto* dataVec = collBuffers.dataAsVector(); + auto* dataVec = new std::vector(); unsigned size(0); device.data(size); dataVec->resize(size); podio::handlePODDataSIO(device, &(*dataVec)[0], size); + m_buffers.data = dataVec; } void write(sio::write_device& device) override { - _col->prepareForWrite(); - auto collBuffers = _col->getBuffers(); - auto* dataVec = collBuffers.dataAsVector(); + auto* dataVec = podio::CollectionWriteBuffers::asVector(m_buffers.data); unsigned size = dataVec->size(); device.data(size); podio::handlePODDataSIO(device, &(*dataVec)[0], size); } - void createCollection(const bool) override { - setCollection(new podio::UserDataCollection); + void createBuffers(bool) override { + + m_buffers.references = new podio::CollRefCollection(); + m_buffers.vectorMembers = new podio::VectorMembersInfo(); + + // Nothing to do here since UserDataCollections cannot be subset collections + m_buffers.createCollection = [](podio::CollectionReadBuffers buffers, bool) { + return std::make_unique>(std::move(*buffers.dataAsVector())); + }; } SIOBlock* create(const std::string& name) const override { return new SIOBlockUserData(name); } + +private: }; } // namespace podio diff --git a/include/podio/SIOFrameData.h b/include/podio/SIOFrameData.h new file mode 100644 index 000000000..9cbc8a724 --- /dev/null +++ b/include/podio/SIOFrameData.h @@ -0,0 +1,89 @@ +#ifndef PODIO_SIOFRAMEDATA_H +#define PODIO_SIOFRAMEDATA_H + +#include "podio/CollectionBuffers.h" +#include "podio/CollectionIDTable.h" +#include "podio/GenericParameters.h" +#include "podio/SIOBlock.h" + +#include +#include + +#include +#include +#include +#include +#include + +namespace podio { +/** + * The Frame data container for the SIO backend. It is constructed from the + * compressed sio::buffers that is read from file and does all the necessary + * unpacking and decompressing internally after construction. + */ +class SIOFrameData { + +public: + SIOFrameData() = delete; + ~SIOFrameData() = default; + + SIOFrameData(const SIOFrameData&) = delete; + SIOFrameData& operator=(const SIOFrameData&) = delete; + + SIOFrameData(SIOFrameData&&) = default; + SIOFrameData& operator=(SIOFrameData&&) = default; + + /** + * Constructor from the collBuffers containing the collection data and a + * tableBuffer containing the necessary information for unpacking the + * collections. The two size parameters denote the uncompressed size of the + * respective buffers. + */ + SIOFrameData(sio::buffer&& collBuffers, std::size_t dataSize, sio::buffer&& tableBuffer, std::size_t tableSize) : + m_recBuffer(std::move(collBuffers)), + m_tableBuffer(std::move(tableBuffer)), + m_dataSize(dataSize), + m_tableSize(tableSize) { + } + + std::optional getCollectionBuffers(const std::string& name); + + podio::CollectionIDTable getIDTable() { + if (m_idTable.empty()) { + readIdTable(); + } + return {m_idTable.ids(), m_idTable.names()}; + } + + std::unique_ptr getParameters(); + + std::vector getAvailableCollections(); + +private: + void unpackBuffers(); + + void readIdTable(); + + void createBlocks(); + + // Default initialization doesn't really matter here, because they are made + // the correct size on construction + sio::buffer m_recBuffer{sio::kbyte}; ///< The compressed record (data) buffer + sio::buffer m_tableBuffer{sio::kbyte}; ///< The compressed collection id table buffer + + std::size_t m_dataSize{}; ///< Uncompressed data buffer size + std::size_t m_tableSize{}; ///< Uncompressed table size + + std::vector m_availableBlocks{}; ///< The blocks that have already been retrieved + + sio::block_list m_blocks{}; + + podio::CollectionIDTable m_idTable{}; + std::vector m_typeNames{}; + std::vector m_subsetCollectionBits{}; + + podio::GenericParameters m_parameters{}; +}; +} // namespace podio + +#endif // PODIO_SIOFRAMEDATA_H diff --git a/include/podio/SIOFrameReader.h b/include/podio/SIOFrameReader.h new file mode 100644 index 000000000..241289ee7 --- /dev/null +++ b/include/podio/SIOFrameReader.h @@ -0,0 +1,63 @@ +#ifndef PODIO_SIOFRAMEREADER_H +#define PODIO_SIOFRAMEREADER_H + +#include "podio/SIOBlock.h" +#include "podio/SIOFrameData.h" +#include "podio/podioVersion.h" + +#include + +#include +#include +#include + +namespace podio { + +class CollectionIDTable; + +class SIOFrameReader { + +public: + SIOFrameReader(); + ~SIOFrameReader() = default; + + // non copyable + SIOFrameReader(const SIOFrameReader&) = delete; + SIOFrameReader& operator=(const SIOFrameReader&) = delete; + + /** + * Read the next data entry from which a Frame can be constructed for the + * given name. In case there are no more entries left for this name or in + * case there is no data for this name, this returns a nullptr. + */ + std::unique_ptr readNextEntry(const std::string& name); + + /// Returns number of entries for the given name + unsigned getEntries(const std::string& name) const; + + void openFile(const std::string& filename); + + podio::version::Version currentFileVersion() const { + return m_fileVersion; + } + +private: + void readPodioHeader(); + + /// read the TOC record + bool readFileTOCRecord(); + + sio::ifstream m_stream{}; ///< The stream from which we read + + /// Count how many times each an entry of this name has been read already + std::unordered_map m_nameCtr{}; + + /// Table of content record where starting points of named entries can be read from + SIOFileTOCRecord m_tocRecord{}; + /// The podio version that has been used to write the file + podio::version::Version m_fileVersion{0}; +}; + +} // namespace podio + +#endif // PODIO_SIOFRAMEREADER_H diff --git a/include/podio/SIOFrameWriter.h b/include/podio/SIOFrameWriter.h new file mode 100644 index 000000000..1ccc7a2e8 --- /dev/null +++ b/include/podio/SIOFrameWriter.h @@ -0,0 +1,41 @@ +#ifndef PODIO_SIOFRAMEWRITER_H +#define PODIO_SIOFRAMEWRITER_H + +#include "podio/SIOBlock.h" + +#include + +#include +#include +#include + +namespace podio { + +class Frame; + +class SIOFrameWriter { +public: + SIOFrameWriter(const std::string& filename); + ~SIOFrameWriter() = default; + + SIOFrameWriter(const SIOFrameWriter&) = delete; + SIOFrameWriter& operator=(const SIOFrameWriter&) = delete; + + /** Write the given Frame with the given category + */ + void writeFrame(const podio::Frame& frame, const std::string& category); + + /** Write the given Frame with the given category only storing the collections + * that are desired via collsToWrite + */ + void writeFrame(const podio::Frame& frame, const std::string& category, const std::vector& collsToWrite); + + void finish(); + +private: + sio::ofstream m_stream{}; ///< The output file stream + SIOFileTOCRecord m_tocRecord{}; ///< The "table of contents" of the written file +}; +} // namespace podio + +#endif // PODIO_SIOFRAMEWRITER_H diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index 461004e80..2365c5094 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -2,6 +2,7 @@ #define PODIO_USERDATACOLLECTION_H #include "podio/CollectionBase.h" +#include "podio/CollectionBuffers.h" #include "podio/utilities/TypeHelpers.h" #include @@ -68,6 +69,9 @@ class UserDataCollection : public CollectionBase { public: UserDataCollection() = default; + /// Constructor from an existing vector (wich will be moved from!) + UserDataCollection(std::vector&& vec) : _vec(std::move(vec)) { + } UserDataCollection(const UserDataCollection&) = delete; UserDataCollection& operator=(const UserDataCollection&) = delete; UserDataCollection(UserDataCollection&&) = default; @@ -98,11 +102,21 @@ class UserDataCollection : public CollectionBase { } /// Get the collection buffers for this collection - podio::CollectionBuffers getBuffers() override { + podio::CollectionWriteBuffers getBuffers() override { _vecPtr = &_vec; // Set the pointer to the correct internal vector return {&_vecPtr, &m_refCollections, &m_vecmem_info}; } + podio::CollectionReadBuffers createBuffers() /*const*/ final { + return {nullptr, nullptr, nullptr, + [](podio::CollectionReadBuffers buffers, bool) { + return std::make_unique>(std::move(*buffers.dataAsVector())); + }, + [](podio::CollectionReadBuffers& buffers) { + buffers.data = podio::CollectionWriteBuffers::asVector(buffers.data); + }}; + } + /// check for validity of the container after read bool isValid() const override { return true; diff --git a/include/podio/utilities/TypeHelpers.h b/include/podio/utilities/TypeHelpers.h index 452600026..b351e2118 100644 --- a/include/podio/utilities/TypeHelpers.h +++ b/include/podio/utilities/TypeHelpers.h @@ -101,6 +101,16 @@ namespace detail { static constexpr bool isVector = IsVectorHelper::value; } // namespace detail + +// forward declaration to be able to use it below +class CollectionBase; + +/** + * Alias template for checking whether a passed type T inherits from podio::CollectionBase + */ +template +static constexpr bool isCollection = std::is_base_of_v; + } // namespace podio #endif // PODIO_UTILITIES_TYPEHELPERS_H diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index e9f40ce2d..bf7fd9e0d 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -19,6 +19,9 @@ {{ collection_type }}::{{ collection_type }}() : m_isValid(false), m_isPrepared(false), m_isSubsetColl(false), m_collectionID(0), m_storage() {} +{{ collection_type }}::{{ collection_type }}({{ collection_type }}Data&& data, bool isSubsetColl) : + m_isValid(false), m_isPrepared(false), m_isSubsetColl(isSubsetColl), m_collectionID(0), m_storage(std::move(data)) {} + {{ collection_type }}::~{{ collection_type }}() { // Need to tell the storage how to clean-up m_storage.clear(m_isSubsetColl); @@ -142,10 +145,35 @@ void {{ collection_type }}::push_back({{ class.bare_type }} object) { } } -podio::CollectionBuffers {{ collection_type }}::getBuffers() { +podio::CollectionWriteBuffers {{ collection_type }}::getBuffers() { return m_storage.getCollectionBuffers(m_isSubsetColl); } +podio::CollectionReadBuffers {{ collection_type }}::createBuffers() /*const*/ { + // Very cumbersome way at the moment. We get the actual buffers to have the + // references and vector members sized appropriately (we will use this + // information to create new buffers outside) + auto collBuffers = m_storage.getCollectionBuffers(m_isSubsetColl); + auto readBuffers = podio::CollectionReadBuffers{}; + readBuffers.references = collBuffers.references; + readBuffers.vectorMembers = collBuffers.vectorMembers; + readBuffers.createCollection = [](podio::CollectionReadBuffers buffers, bool isSubsetColl) { + {{ collection_type }}Data data(buffers, isSubsetColl); + return std::make_unique<{{ collection_type }}>(std::move(data), isSubsetColl); + }; + readBuffers.recast = [](podio::CollectionReadBuffers& buffers) { + if (buffers.data) { + buffers.data = podio::CollectionWriteBuffers::asVector<{{ class.full_type }}Data>(buffers.data); + } +{% if VectorMembers %} +{% for member in VectorMembers %} + (*buffers.vectorMembers)[{{ loop.index0 }}].second = podio::CollectionWriteBuffers::asVector<{{ member.full_type }}>((*buffers.vectorMembers)[{{ loop.index0 }}].second); +{% endfor %} +{% endif %} + }; + return readBuffers; +} + #ifdef PODIO_JSON_OUTPUT void to_json(nlohmann::json& j, const {{ collection_type }}& collection) { j = nlohmann::json::array(); diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index 80b963ccb..b6f444e49 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -47,6 +47,7 @@ public: using iterator = {{ class.bare_type }}MutableCollectionIterator; {{ class.bare_type }}Collection(); + {{ class.bare_type }}Collection({{ class.bare_type }}CollectionData&& data, bool isSubsetColl); // This is a move-only type {{ class.bare_type }}Collection(const {{ class.bare_type}}Collection& ) = delete; {{ class.bare_type }}Collection& operator=(const {{ class.bare_type}}Collection& ) = delete; @@ -106,7 +107,10 @@ public: bool setReferences(const podio::ICollectionProvider* collectionProvider) final; /// Get the collection buffers for this collection - podio::CollectionBuffers getBuffers() final; + podio::CollectionWriteBuffers getBuffers() final; + + /// Create (empty) collection buffers from which a collection can be constructed + podio::CollectionReadBuffers createBuffers() /*const*/ final; void setID(unsigned ID) final { m_collectionID = ID; diff --git a/python/templates/CollectionData.cc.jinja2 b/python/templates/CollectionData.cc.jinja2 index 5ea6342b2..ddc6f29dd 100644 --- a/python/templates/CollectionData.cc.jinja2 +++ b/python/templates/CollectionData.cc.jinja2 @@ -28,6 +28,23 @@ {% endfor %} } +{{ class_type }}::{{ class_type }}(podio::CollectionReadBuffers buffers, bool isSubsetColl) : +{% for relation in OneToManyRelations + OneToOneRelations %} + m_rel_{{ relation.name }}(new std::vector<{{ relation.namespace }}::{{ relation.bare_type }}>()), +{% endfor %} + m_refCollections(std::move(*buffers.references)), + m_vecmem_info(std::move(*buffers.vectorMembers)) { + // For subset collections we are done, for proper collections we still have to + // populate the data and vector members + if (!isSubsetColl) { + m_data.reset(buffers.dataAsVector<{{ class.full_type }}Data>()); + +{% for member in VectorMembers %} + m_vec_{{ member.name }}.reset(podio::CollectionReadBuffers::asVector<{{ member.full_type }}>(m_vecmem_info[{{ loop.index0 }}].second)); +{% endfor %} + } +} + void {{ class_type }}::clear(bool isSubsetColl) { if (isSubsetColl) { // We don't own the objects so no cleanup to do here @@ -64,7 +81,7 @@ void {{ class_type }}::clear(bool isSubsetColl) { entries.clear(); } -podio::CollectionBuffers {{ class_type }}::getCollectionBuffers(bool isSubsetColl) { +podio::CollectionWriteBuffers {{ class_type }}::getCollectionBuffers(bool isSubsetColl) { {% if VectorMembers %} // Make sure these point to the right place, even if a collection has been // moved since it has been created diff --git a/python/templates/CollectionData.h.jinja2 b/python/templates/CollectionData.h.jinja2 index 43faaa331..50ae8dd02 100644 --- a/python/templates/CollectionData.h.jinja2 +++ b/python/templates/CollectionData.h.jinja2 @@ -41,6 +41,11 @@ public: */ {{ class_type }}(); + /** + * Constructor from existing I/O buffers + */ + {{ class_type }}(podio::CollectionReadBuffers buffers, bool isSubsetColl); + /** * Non copy-able, move-only class */ @@ -56,7 +61,7 @@ public: void clear(bool isSubsetColl); - podio::CollectionBuffers getCollectionBuffers(bool isSubsetColl); + podio::CollectionWriteBuffers getCollectionBuffers(bool isSubsetColl); void prepareForWrite(bool isSubsetColl); @@ -77,12 +82,12 @@ private: std::vector> m_rel_{{ relation.name }}_tmp{}; ///< Relation buffer for internal book-keeping {% endfor %} {% for relation in OneToOneRelations %} - podio::UVecPtr<{{ relation.namespace }}::{{ relation.bare_type }}> m_rel_{{ relation.name }}; ///< Relation buffer for read / write + podio::UVecPtr<{{ relation.namespace }}::{{ relation.bare_type }}> m_rel_{{ relation.name }}{nullptr}; ///< Relation buffer for read / write {% endfor %} // members to handle vector members {% for member in VectorMembers %} - podio::UVecPtr<{{ member.full_type }}> m_vec_{{ member.name }}; /// combined vector of all objects in collection + podio::UVecPtr<{{ member.full_type }}> m_vec_{{ member.name }}{nullptr}; /// combined vector of all objects in collection std::vector> m_vecs_{{ member.name }}{}; /// pointers to individual member vectors {% endfor %} diff --git a/python/templates/SIOBlock.cc.jinja2 b/python/templates/SIOBlock.cc.jinja2 index 10282d014..a33febb8c 100644 --- a/python/templates/SIOBlock.cc.jinja2 +++ b/python/templates/SIOBlock.cc.jinja2 @@ -5,6 +5,8 @@ #include "{{ incfolder }}{{ class.bare_type }}SIOBlock.h" #include "{{ incfolder }}{{ class.bare_type }}Collection.h" +#include "podio/CollectionBuffers.h" + #include #include #include @@ -13,17 +15,25 @@ {% with block_class = class.bare_type + 'SIOBlock' %} void {{ block_class }}::read(sio::read_device& device, sio::version_type) { - auto collBuffers = _col->getBuffers(); - if (not _col->isSubsetCollection()) { - auto* dataVec = collBuffers.dataAsVector<{{ class.full_type }}Data>(); + if (m_subsetColl) { + m_buffers.references->emplace_back(std::make_unique>()); + } else { +{% for relation in OneToManyRelations + OneToOneRelations %} + m_buffers.references->emplace_back(std::make_unique>()); +{% endfor %} + } + + if (not m_subsetColl) { unsigned size(0); device.data( size ); - dataVec->resize(size); + m_buffers.data = new std::vector<{{ class.full_type }}Data>(size); + auto* dataVec = m_buffers.dataAsVector<{{ class.full_type }}Data>(); podio::handlePODDataSIO(device, dataVec->data(), size); + // m_buffers.data = dataVec; } //---- read ref collections ----- - auto* refCols = collBuffers.references; + auto* refCols = m_buffers.references; for( auto& refC : *refCols ){ unsigned size{0}; device.data( size ) ; @@ -32,8 +42,13 @@ void {{ block_class }}::read(sio::read_device& device, sio::version_type) { } {% if VectorMembers %} +{% for member in VectorMembers %} + // auto {{ member.name }}Buffers = new std::vector<{{ member.full_type }}>(); + // m_buffers.vectorMembers->emplace_back("{{ member.full_type }}", &{{ member.name }}Buffers); + m_buffers.vectorMembers->emplace_back("{{ member.full_type }}", new std::vector<{{ member.full_type }}>()); +{% endfor %} //---- read vector members - auto* vecMemInfo = collBuffers.vectorMembers; + auto* vecMemInfo = m_buffers.vectorMembers; unsigned size{0}; {% for member in VectorMembers %} @@ -43,17 +58,15 @@ void {{ block_class }}::read(sio::read_device& device, sio::version_type) { } void {{ block_class }}::write(sio::write_device& device) { - _col->prepareForWrite() ; - auto collBuffers = _col->getBuffers(); - if (not _col->isSubsetCollection()) { - auto* dataVec = collBuffers.dataAsVector<{{ class.full_type }}Data>(); + if (not m_subsetColl) { + auto* dataVec = podio::CollectionWriteBuffers::asVector<{{ class.full_type }}Data>(m_buffers.data); unsigned size = dataVec->size() ; device.data( size ) ; podio::handlePODDataSIO( device , dataVec->data(), size ) ; } //---- write ref collections ----- - auto* refCols = collBuffers.references; + auto* refCols = m_buffers.references; for( auto& refC : *refCols ){ unsigned size = refC->size() ; device.data( size ) ; @@ -62,7 +75,7 @@ void {{ block_class }}::write(sio::write_device& device) { {% if VectorMembers %} //---- write vector members - auto* vecMemInfo = collBuffers.vectorMembers; + auto* vecMemInfo = m_buffers.vectorMembers; unsigned size{0}; {% for member in VectorMembers %} @@ -71,9 +84,18 @@ void {{ block_class }}::write(sio::write_device& device) { {% endif %} } -void {{ block_class }}::createCollection(const bool subsetCollection) { - setCollection(new {{ class.bare_type }}Collection); - _col->setSubsetCollection(subsetCollection); +void {{ block_class }}::createBuffers(bool subsetColl) { + m_subsetColl = subsetColl; + + + + m_buffers.references = new podio::CollRefCollection(); + m_buffers.vectorMembers = new podio::VectorMembersInfo(); + + m_buffers.createCollection = [](podio::CollectionReadBuffers buffers, bool isSubsetColl) { + {{ class.bare_type }}CollectionData data(buffers, isSubsetColl); + return std::make_unique<{{ class.bare_type }}Collection>(std::move(data), isSubsetColl); + }; } {% endwith %} diff --git a/python/templates/SIOBlock.h.jinja2 b/python/templates/SIOBlock.h.jinja2 index 1ea55d69e..62d210852 100644 --- a/python/templates/SIOBlock.h.jinja2 +++ b/python/templates/SIOBlock.h.jinja2 @@ -13,6 +13,10 @@ #include #include +namespace podio { + struct CollectionReadBuffers; +} + {{ utils.namespace_open(class.namespace) }} {% with block_class = class.bare_type + 'SIOBlock' %} @@ -27,15 +31,18 @@ public: // SIOBlock(name + "__{{ class.bare_type }}", sio::version::encode_version(0, 1)) {} SIOBlock(name, sio::version::encode_version(0, 1)) {} - // Read the particle data from the device - virtual void read(sio::read_device& device, sio::version_type version) override; + // Read the collection data from the device + void read(sio::read_device& device, sio::version_type version) override; - // Write the particle data to the device - virtual void write(sio::write_device& device) override; + // Write the collection data to the device + void write(sio::write_device& device) override; - virtual void createCollection(const bool subsetCollection=false) override; + void createBuffers(bool isSubsetColl) override; SIOBlock* create(const std::string& name) const override { return new {{ block_class }}(name); } + +private: + podio::CollectionReadBuffers createBuffers() const; }; static {{ block_class }} _dummy{{ block_class }}; diff --git a/python/templates/macros/sioblocks.jinja2 b/python/templates/macros/sioblocks.jinja2 index 379aceae7..b3b903f8b 100644 --- a/python/templates/macros/sioblocks.jinja2 +++ b/python/templates/macros/sioblocks.jinja2 @@ -8,9 +8,9 @@ {% macro vector_member_read(member, index) %} - auto* vec{{ index }} = *reinterpret_cast**>(vecMemInfo->at({{ index }}).second); + auto* vec{{ index }} = reinterpret_cast*>(vecMemInfo->at({{ index }}).second); size = 0u; device.data(size); vec{{ index }}->resize(size); - podio::handlePODDataSIO(device, &(*vec{{ index }})[0], size); + podio::handlePODDataSIO(device, vec{{ index }}->data(), size); {% endmacro %} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9c4b5bd87..696831613 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,6 +11,8 @@ SET(root_sources rootUtils.h ROOTWriter.cc ROOTReader.cc + ROOTFrameWriter.cc + ROOTFrameReader.cc ) SET(sio_sources @@ -18,6 +20,9 @@ SET(sio_sources SIOWriter.cc SIOBlockUserData.cc SIOBlock.cc + SIOFrameWriter.cc + SIOFrameReader.cc + SIOFrameData.cc ) SET(python_sources diff --git a/src/CollectionIDTable.cc b/src/CollectionIDTable.cc index 7d25f63d4..f999af077 100644 --- a/src/CollectionIDTable.cc +++ b/src/CollectionIDTable.cc @@ -6,21 +6,21 @@ namespace podio { const std::string CollectionIDTable::name(int ID) const { - std::lock_guard lock(m_mutex); + std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_collectionIDs), end(m_collectionIDs), ID); const auto index = std::distance(m_collectionIDs.begin(), result); return m_names[index]; } int CollectionIDTable::collectionID(const std::string& name) const { - std::lock_guard lock(m_mutex); + std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_names), end(m_names), name); const auto index = std::distance(m_names.begin(), result); return m_collectionIDs[index]; } void CollectionIDTable::print() const { - std::lock_guard lock(m_mutex); + std::lock_guard lock(*m_mutex); std::cout << "CollectionIDTable" << std::endl; for (unsigned i = 0; i < m_names.size(); ++i) { std::cout << "\t" << m_names[i] << " : " << m_collectionIDs[i] << std::endl; @@ -28,13 +28,13 @@ void CollectionIDTable::print() const { } bool CollectionIDTable::present(const std::string& name) const { - std::lock_guard lock(m_mutex); + std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_names), end(m_names), name); return result != end(m_names); } int CollectionIDTable::add(const std::string& name) { - std::lock_guard lock(m_mutex); + std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_names), end(m_names), name); int ID = 0; if (result == m_names.end()) { diff --git a/src/GenericParameters.cc b/src/GenericParameters.cc index 654f4dd61..4cf48d550 100644 --- a/src/GenericParameters.cc +++ b/src/GenericParameters.cc @@ -5,6 +5,23 @@ namespace podio { +GenericParameters::GenericParameters(const GenericParameters& other) : + m_intMtx(std::make_unique()), + m_floatMtx(std::make_unique()), + m_stringMtx(std::make_unique()) { + { + // acquire all three locks at once to make sure all three internal maps are + // copied at the same "state" of the GenericParameters + auto& intMtx = other.getMutex(); + auto& floatMtx = other.getMutex(); + auto& stringMtx = other.getMutex(); + std::scoped_lock lock(intMtx, floatMtx, stringMtx); + _intMap = other._intMap; + _floatMap = other._floatMap; + _stringMap = other._stringMap; + } +} + int GenericParameters::getIntVal(const std::string& key) const { return getValue(key); } diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc new file mode 100644 index 000000000..30cbda383 --- /dev/null +++ b/src/ROOTFrameReader.cc @@ -0,0 +1,275 @@ +#include "podio/ROOTFrameReader.h" +#include "podio/CollectionBase.h" +#include "podio/CollectionBuffers.h" +#include "podio/CollectionIDTable.h" +#include "podio/GenericParameters.h" +#include "rootUtils.h" + +// ROOT specific includes +#include "TChain.h" +#include "TClass.h" +#include "TFile.h" +#include "TTree.h" +#include "TTreeCache.h" + +#include + +namespace podio { + +std::tuple, std::vector>> +createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, + const std::vector& collInfo); + +GenericParameters ROOTFrameReader::readEventMetaData(ROOTFrameReader::CategoryInfo& catInfo) { + // Parameter branch is always the last one + auto& paramBranches = catInfo.branches.back(); + auto* branch = paramBranches.data; + + GenericParameters params; + auto* emd = ¶ms; + branch->SetAddress(&emd); + branch->GetEntry(catInfo.entry); + return params; +} + +std::unique_ptr ROOTFrameReader::readNextEntry(const std::string& name) { + auto& catInfo = getCategoryInfo(name); + if (!catInfo.chain) { + return nullptr; + } + if (catInfo.entry >= catInfo.chain->GetEntries()) { + return nullptr; + } + + ROOTFrameData::BufferMap buffers; + for (size_t i = 0; i < catInfo.storedClasses.size(); ++i) { + buffers.emplace(catInfo.storedClasses[i].first, getCollectionBuffers(catInfo, i)); + } + + auto parameters = readEventMetaData(catInfo); + + catInfo.entry++; + return std::make_unique(std::move(buffers), catInfo.table, std::move(parameters)); +} + +podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameReader::CategoryInfo& catInfo, + size_t iColl) { + const auto& name = catInfo.storedClasses[iColl].first; + const auto& [theClass, collectionClass, index] = catInfo.storedClasses[iColl].second; + auto& branches = catInfo.branches[index]; + + // Create empty collection buffers, and connect them to the right branches + auto collBuffers = podio::CollectionReadBuffers(); + // If we have a valid data buffer class we know that have to read data, + // otherwise we are handling a subset collection + const bool isSubsetColl = theClass == nullptr; + if (!isSubsetColl) { + collBuffers.data = theClass->New(); + } + + { + auto collection = + std::unique_ptr(static_cast(collectionClass->New())); + collection->setSubsetCollection(isSubsetColl); + + auto tmpBuffers = collection->createBuffers(); + collBuffers.createCollection = std::move(tmpBuffers.createCollection); + collBuffers.recast = std::move(tmpBuffers.recast); + + if (auto* refs = tmpBuffers.references) { + collBuffers.references = new podio::CollRefCollection(refs->size()); + } + if (auto* vminfo = tmpBuffers.vectorMembers) { + collBuffers.vectorMembers = new podio::VectorMembersInfo(); + collBuffers.vectorMembers->reserve(vminfo->size()); + + for (const auto& [type, _] : (*vminfo)) { + const auto* vecClass = TClass::GetClass(("vector<" + type + ">").c_str()); + collBuffers.vectorMembers->emplace_back(type, vecClass->New()); + } + } + } + + const auto localEntry = catInfo.chain->LoadTree(catInfo.entry); + // After switching trees in the chain, branch pointers get invalidated so + // they need to be reassigned. + // NOTE: root 6.22/06 requires that we get completely new branches here, + // with 6.20/04 we could just re-set them + if (localEntry == 0) { + branches.data = root_utils::getBranch(catInfo.chain.get(), name.c_str()); + + // reference collections + if (auto* refCollections = collBuffers.references) { + for (size_t i = 0; i < refCollections->size(); ++i) { + const auto brName = root_utils::refBranch(name, i); + branches.refs[i] = root_utils::getBranch(catInfo.chain.get(), brName.c_str()); + } + } + + // vector members + if (auto* vecMembers = collBuffers.vectorMembers) { + for (size_t i = 0; i < vecMembers->size(); ++i) { + const auto brName = root_utils::vecBranch(name, i); + branches.vecs[i] = root_utils::getBranch(catInfo.chain.get(), brName.c_str()); + } + } + } + + // set the addresses and read the data + root_utils::setCollectionAddresses(collBuffers, branches); + root_utils::readBranchesData(branches, localEntry); + + collBuffers.recast(collBuffers); + + return collBuffers; +} + +ROOTFrameReader::CategoryInfo& ROOTFrameReader::getCategoryInfo(const std::string& category) { + if (auto it = m_categories.find(category); it != m_categories.end()) { + // Use the id table as proxy to check whether this category has been + // initialized alrready + if (it->second.table == nullptr) { + initCategory(it->second, category); + } + return it->second; + } + + // Use a nullptr TChain to signify an invalid category request + // TODO: Warn / log + static auto invalidCategory = CategoryInfo{nullptr}; + + return invalidCategory; +} + +void ROOTFrameReader::initCategory(CategoryInfo& catInfo, const std::string& category) { + catInfo.table = std::make_shared(); + auto* table = catInfo.table.get(); + auto* tableBranch = root_utils::getBranch(m_metaChain.get(), root_utils::idTableName(category)); + tableBranch->SetAddress(&table); + tableBranch->GetEntry(0); + + auto* collInfoBranch = root_utils::getBranch(m_metaChain.get(), root_utils::collInfoName(category)); + auto collInfo = new std::vector(); + collInfoBranch->SetAddress(&collInfo); + collInfoBranch->GetEntry(0); + + std::tie(catInfo.branches, catInfo.storedClasses) = + createCollectionBranches(catInfo.chain.get(), *catInfo.table, *collInfo); + + delete collInfo; + + // Finaly set up the branches for the paramters + root_utils::CollectionBranches paramBranches{}; + paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName); + catInfo.branches.push_back(paramBranches); +} + +std::vector getAvailableCategories(TChain* metaChain) { + auto* branches = metaChain->GetListOfBranches(); + std::vector brNames; + brNames.reserve(branches->GetEntries()); + for (int i = 0; i < branches->GetEntries(); ++i) { + const std::string name = branches->At(i)->GetName(); + const auto fUnder = name.find("___"); + if (fUnder != std::string::npos) { + brNames.emplace_back(name.substr(0, fUnder)); + } + } + + std::sort(brNames.begin(), brNames.end()); + brNames.erase(std::unique(brNames.begin(), brNames.end()), brNames.end()); + + return brNames; +} + +void ROOTFrameReader::openFile(const std::string& filename) { + openFiles({filename}); +} + +void ROOTFrameReader::openFiles(const std::vector& filenames) { + m_metaChain = std::make_unique(root_utils::metaTreeName); + // NOTE: We simply assume that the meta data doesn't change throughout the + // chain! This essentially boils down to the assumption that all files that + // are read this way were written with the same settings. + m_metaChain->Add(filenames[0].c_str()); + + podio::version::Version* versionPtr{nullptr}; + if (auto* versionBranch = root_utils::getBranch(m_metaChain.get(), root_utils::versionBranchName)) { + versionBranch->SetAddress(&versionPtr); + versionBranch->GetEntry(0); + } + m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0}; + delete versionPtr; + + // Do some work up front for setting up categories and setup all the chains + // and record the available categories. The rest of the setup follows on + // demand when the category is first read + m_availCategories = getAvailableCategories(m_metaChain.get()); + for (const auto& cat : m_availCategories) { + auto [it, _] = m_categories.try_emplace(cat, std::make_unique(cat.c_str())); + for (const auto& fn : filenames) { + it->second.chain->Add(fn.c_str()); + } + } +} + +unsigned ROOTFrameReader::getEntries(const std::string& name) const { + if (auto it = m_categories.find(name); it != m_categories.end()) { + return it->second.chain->GetEntries(); + } + + return 0; +} + +std::tuple, std::vector>> +createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, + const std::vector& collInfo) { + + size_t collectionIndex{0}; + std::vector collBranches; + collBranches.reserve(collInfo.size() + 1); + std::vector> storedClasses; + storedClasses.reserve(collInfo.size()); + + for (const auto& [collID, collType, isSubsetColl] : collInfo) { + // We only write collections that are in the collectionIDTable, so no need + // to check here + const auto name = idTable.name(collID); + + root_utils::CollectionBranches branches{}; + const auto collectionClass = TClass::GetClass(collType.c_str()); + + // Need the collection here to setup all the branches. Have to manage the + // temporary collection ourselves + auto collection = + std::unique_ptr(static_cast(collectionClass->New())); + collection->setSubsetCollection(isSubsetColl); + + if (!isSubsetColl) { + // This branch is guaranteed to exist since only collections that are + // also written to file are in the info metadata that we work with here + branches.data = root_utils::getBranch(chain, name.c_str()); + } + + const auto buffers = collection->getBuffers(); + for (size_t i = 0; i < buffers.references->size(); ++i) { + const auto brName = root_utils::refBranch(name, i); + branches.refs.push_back(root_utils::getBranch(chain, brName.c_str())); + } + + for (size_t i = 0; i < buffers.vectorMembers->size(); ++i) { + const auto brName = root_utils::vecBranch(name, i); + branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str())); + } + + const std::string bufferClassName = "std::vector<" + collection->getDataTypeName() + ">"; + const auto bufferClass = isSubsetColl ? nullptr : TClass::GetClass(bufferClassName.c_str()); + + storedClasses.emplace_back(name, std::make_tuple(bufferClass, collectionClass, collectionIndex++)); + collBranches.push_back(branches); + } + + return {collBranches, storedClasses}; +} + +} // namespace podio diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc new file mode 100644 index 000000000..39708a096 --- /dev/null +++ b/src/ROOTFrameWriter.cc @@ -0,0 +1,139 @@ +#include "podio/ROOTFrameWriter.h" +#include "podio/CollectionBase.h" +#include "podio/Frame.h" +#include "podio/GenericParameters.h" +#include "podio/podioVersion.h" + +#include "rootUtils.h" + +#include "TTree.h" + +namespace podio { + +ROOTFrameWriter::ROOTFrameWriter(const std::string& filename) { + m_file = std::make_unique(filename.c_str(), "recreate"); +} + +void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& category) { + writeFrame(frame, category, frame.getAvailableCollections()); +} + +void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& category, + const std::vector& collsToWrite) { + auto& catInfo = getCategoryInfo(category); + // Use the TTree as proxy here to decide whether this category has already + // been initialized + if (catInfo.tree == nullptr) { + catInfo.idTable = frame.getCollectionIDTableForWrite(); + catInfo.collsToWrite = collsToWrite; + catInfo.tree = new TTree(category.c_str(), (category + " data tree").c_str()); + catInfo.tree->SetDirectory(m_file.get()); + } + + std::vector collections; + collections.reserve(catInfo.collsToWrite.size()); + for (const auto& name : catInfo.collsToWrite) { + auto* coll = frame.getCollectionForWrite(name); + collections.emplace_back(name, const_cast(coll)); + } + + // We will at least have a parameters branch, even if there are no + // collections + if (catInfo.branches.empty()) { + initBranches(catInfo, collections, const_cast(frame.getGenericParametersForWrite())); + + } else { + resetBranches(catInfo.branches, collections, + &const_cast(frame.getGenericParametersForWrite())); + } + + catInfo.tree->Fill(); +} + +ROOTFrameWriter::CategoryInfo& ROOTFrameWriter::getCategoryInfo(const std::string& category) { + if (auto it = m_categories.find(category); it != m_categories.end()) { + return it->second; + } + + auto [it, _] = m_categories.try_emplace(category, CategoryInfo{}); + return it->second; +} + +void ROOTFrameWriter::initBranches(CategoryInfo& catInfo, const std::vector& collections, + /*const*/ podio::GenericParameters& parameters) { + catInfo.branches.reserve(collections.size() + 1); // collections + parameters + + // First collections + for (auto& [name, coll] : collections) { + root_utils::CollectionBranches branches; + const auto buffers = coll->getBuffers(); + + // data buffer branch, only for non-subset collections + if (buffers.data) { + auto bufferDataType = "vector<" + coll->getDataTypeName() + ">"; + branches.data = catInfo.tree->Branch(name.c_str(), bufferDataType.c_str(), buffers.data); + } + + // reference collections + if (auto refColls = buffers.references) { + int i = 0; + for (auto& c : (*refColls)) { + const auto brName = root_utils::refBranch(name, i++); + branches.refs.push_back(catInfo.tree->Branch(brName.c_str(), c.get())); + } + } + + // vector members + if (auto vmInfo = buffers.vectorMembers) { + int i = 0; + for (auto& [type, vec] : (*vmInfo)) { + const auto typeName = "vector<" + type + ">"; + const auto brName = root_utils::vecBranch(name, i++); + branches.vecs.push_back(catInfo.tree->Branch(brName.c_str(), typeName.c_str(), vec)); + } + } + + catInfo.branches.push_back(branches); + catInfo.collInfo.emplace_back(catInfo.idTable.collectionID(name), coll->getTypeName(), coll->isSubsetCollection()); + } + + // Also make branches for the parameters + root_utils::CollectionBranches branches; + branches.data = catInfo.tree->Branch(root_utils::paramBranchName, ¶meters); + catInfo.branches.push_back(branches); +} + +void ROOTFrameWriter::resetBranches(std::vector& branches, + const std::vector& collections, + /*const*/ podio::GenericParameters* parameters) { + size_t iColl = 0; + for (auto& coll : collections) { + const auto& collBranches = branches[iColl]; + root_utils::setCollectionAddresses(coll.second->getBuffers(), collBranches); + iColl++; + } + + branches.back().data->SetAddress(¶meters); +} + +void ROOTFrameWriter::finish() { + auto* metaTree = new TTree(root_utils::metaTreeName, "metadata tree for podio I/O functionality"); + metaTree->SetDirectory(m_file.get()); + + // Store the collection id table and collection info for reading in the meta tree + for (/*const*/ auto& [category, info] : m_categories) { + metaTree->Branch(root_utils::idTableName(category).c_str(), &info.idTable); + metaTree->Branch(root_utils::collInfoName(category).c_str(), &info.collInfo); + } + + // Store the current podio build version into the meta data tree + auto podioVersion = podio::version::build_version; + metaTree->Branch(root_utils::versionBranchName, &podioVersion); + + metaTree->Fill(); + + m_file->Write(); + m_file->Close(); +} + +} // namespace podio diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index d5ae89775..c34cef9c0 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -109,7 +109,7 @@ CollectionBase* ROOTReader::getCollection(const std::pairgetBuffers(), branches); return readCollectionData(branches, collection, localEntry, name); } diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index 2a8b09bfd..46842b370 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -90,7 +90,7 @@ void ROOTWriter::setBranches(const std::vector& collections) { size_t iCollection = 0; for (auto& coll : collections) { const auto& branches = m_collectionBranches[iCollection]; - root_utils::setCollectionAddresses(coll.second, branches); + root_utils::setCollectionAddresses(coll.second->getBuffers(), branches); iCollection++; } @@ -111,6 +111,7 @@ void ROOTWriter::finish() { // No check necessary, only registered collections possible m_store->get(name, coll); const auto collType = coll->getTypeName(); + // const auto collType = "std::vector<" + coll->getDataTypeName() + ">"; collectionInfo.emplace_back(collID, std::move(collType), coll->isSubsetCollection()); } diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index db2b777af..c521b694c 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -113,12 +113,12 @@ void SIONumberedMetaDataBlock::write(sio::write_device& device) { } std::shared_ptr SIOBlockFactory::createBlock(const std::string& typeStr, const std::string& name, - const bool isRefColl) const { + const bool isSubsetColl) const { const auto it = _map.find(typeStr); if (it != _map.end()) { auto blk = std::shared_ptr(it->second->create(name)); - blk->createCollection(isRefColl); + blk->createBuffers(isSubsetColl); return blk; } else { return nullptr; @@ -210,6 +210,18 @@ size_t SIOFileTOCRecord::getNRecords(const std::string& name) const { return 0; } +SIOFileTOCRecord::PositionType SIOFileTOCRecord::getPosition(const std::string& name, unsigned iEntry) const { + const auto it = std::find_if(m_recordMap.cbegin(), m_recordMap.cend(), + [&name](const auto& keyVal) { return keyVal.first == name; }); + if (it != m_recordMap.end()) { + if (iEntry < it->second.size()) { + return it->second[iEntry]; + } + } + + return 0; +} + void SIOFileTOCRecordBlock::read(sio::read_device& device, sio::version_type) { int size; device.data(size); diff --git a/src/SIOFrameData.cc b/src/SIOFrameData.cc new file mode 100644 index 000000000..e7c87e9e6 --- /dev/null +++ b/src/SIOFrameData.cc @@ -0,0 +1,98 @@ +#include "podio/SIOFrameData.h" +#include "podio/SIOBlock.h" + +#include + +#include +#include + +namespace podio { +std::optional SIOFrameData::getCollectionBuffers(const std::string& name) { + unpackBuffers(); + + if (m_idTable.present(name)) { + // The collections that we read are not necessarily in the same order as + // they are in the collection id table. Hence, we cannot simply use the + // collection ID to index into the blocks + const auto& names = m_idTable.names(); + const auto nameIt = std::find(std::begin(names), std::end(names), name); + // collection indices start at 1! + const auto index = std::distance(std::begin(names), nameIt) + 1; + + m_availableBlocks[index] = 1; + return {dynamic_cast(m_blocks[index].get())->getBuffers()}; + } + + return std::nullopt; +} + +std::unique_ptr SIOFrameData::getParameters() { + unpackBuffers(); + m_availableBlocks[0] = 0; + return std::make_unique(std::move(m_parameters)); +} + +std::vector SIOFrameData::getAvailableCollections() { + unpackBuffers(); + std::vector collections; + for (size_t i = 0; i < m_blocks.size(); ++i) { + if (m_availableBlocks[i]) { + collections.push_back(m_idTable.name(i)); + } + } + + return collections; +} + +void SIOFrameData::unpackBuffers() { + // Only do the unpacking once. Use the block as proxy for deciding whether + // we have already unpacked things, since that is the main thing we do in + // here: create blocks and read the data into them + if (!m_blocks.empty()) { + return; + } + + if (m_idTable.empty()) { + readIdTable(); + } + + createBlocks(); + + sio::zlib_compression compressor; + sio::buffer uncBuffer{m_dataSize}; + compressor.uncompress(m_recBuffer.span(), uncBuffer); + sio::api::read_blocks(uncBuffer.span(), m_blocks); +} + +void SIOFrameData::createBlocks() { + m_blocks.reserve(m_typeNames.size() + 1); + // First block during writing is parameters / metadata, then collections + auto parameters = std::make_shared(); + parameters->metadata = &m_parameters; + m_blocks.push_back(parameters); + + for (size_t i = 0; i < m_typeNames.size(); ++i) { + const bool subsetColl = !m_subsetCollectionBits.empty() && m_subsetCollectionBits[i]; + auto blk = podio::SIOBlockFactory::instance().createBlock(m_typeNames[i], m_idTable.names()[i], subsetColl); + m_blocks.push_back(blk); + } + + m_availableBlocks.resize(m_blocks.size(), 1); +} + +void SIOFrameData::readIdTable() { + sio::buffer uncBuffer{m_tableSize}; + sio::zlib_compression compressor; + compressor.uncompress(m_tableBuffer.span(), uncBuffer); + + sio::block_list blocks; + blocks.emplace_back(std::make_shared()); + sio::api::read_blocks(uncBuffer.span(), blocks); + + auto* idTableBlock = static_cast(blocks[0].get()); + m_idTable = std::move(*idTableBlock->getTable()); + m_typeNames = idTableBlock->getTypeNames(); + m_subsetCollectionBits = idTableBlock->getSubsetCollectionBits(); +} + +} // namespace podio diff --git a/src/SIOFrameReader.cc b/src/SIOFrameReader.cc new file mode 100644 index 000000000..5b82f216d --- /dev/null +++ b/src/SIOFrameReader.cc @@ -0,0 +1,113 @@ +#include "podio/SIOFrameReader.h" +#include "podio/SIOBlock.h" + +#include +#include +#include + +#include + +namespace podio { + +namespace sio_utils { + // Read the record into a buffer and potentially uncompress it + std::pair readRecord(sio::ifstream& stream, bool decompress = true, + std::size_t initBufferSize = sio::mbyte) { + sio::record_info recInfo; + sio::buffer infoBuffer{sio::max_record_info_len}; + sio::buffer recBuffer{initBufferSize}; + sio::api::read_record_info(stream, recInfo, infoBuffer); + sio::api::read_record_data(stream, recInfo, recBuffer); + + if (decompress) { + sio::buffer uncBuffer{recInfo._uncompressed_length}; + sio::zlib_compression compressor; + compressor.uncompress(recBuffer.span(), uncBuffer); + return std::make_pair(std::move(uncBuffer), recInfo); + } + + return std::make_pair(std::move(recBuffer), recInfo); + } +} // namespace sio_utils + +SIOFrameReader::SIOFrameReader() { + auto& libLoader [[maybe_unused]] = SIOBlockLibraryLoader::instance(); +} + +void SIOFrameReader::openFile(const std::string& filename) { + m_stream.open(filename, std::ios::binary); + if (!m_stream.is_open()) { + SIO_THROW(sio::error_code::not_open, "Cannot open input file '" + filename + "' for reading"); + } + + // NOTE: reading TOC record first because that jumps back to the start of the file! + readFileTOCRecord(); + readPodioHeader(); +} + +std::unique_ptr SIOFrameReader::readNextEntry(const std::string& name) { + // Skip to where the next record of this name starts in the file, based on + // how many times we have already read this name + // + // NOTE: exploiting the fact that the operator[] of a map will create a + // default initialized entry for us if not present yet + const auto recordPos = m_tocRecord.getPosition(name, m_nameCtr[name]); + if (recordPos == 0) { + return nullptr; + } + m_stream.seekg(recordPos); + + auto [tableBuffer, tableInfo] = sio_utils::readRecord(m_stream, false); + auto [dataBuffer, dataInfo] = sio_utils::readRecord(m_stream, false); + + m_nameCtr[name]++; + + return std::make_unique(std::move(dataBuffer), dataInfo._uncompressed_length, std::move(tableBuffer), + tableInfo._uncompressed_length); +} + +unsigned SIOFrameReader::getEntries(const std::string& name) const { + return m_tocRecord.getNRecords(name); +} + +bool SIOFrameReader::readFileTOCRecord() { + // Check if there is a dedicated marker at the end of the file that tells us + // where the TOC actually starts + m_stream.seekg(-sio_helpers::SIOTocInfoSize, std::ios_base::end); + uint64_t firstWords{0}; + m_stream.read(reinterpret_cast(&firstWords), sizeof(firstWords)); + + const uint32_t marker = (firstWords >> 32) & 0xffffffff; + if (marker == sio_helpers::SIOTocMarker) { + const uint32_t position = firstWords & 0xffffffff; + m_stream.seekg(position); + + const auto& [uncBuffer, _] = sio_utils::readRecord(m_stream); + + sio::block_list blocks; + auto tocBlock = std::make_shared(); + tocBlock->record = &m_tocRecord; + blocks.push_back(tocBlock); + + sio::api::read_blocks(uncBuffer.span(), blocks); + + m_stream.seekg(0); + return true; + } + + m_stream.clear(); + m_stream.seekg(0); + return false; +} + +void SIOFrameReader::readPodioHeader() { + const auto& [buffer, _] = sio_utils::readRecord(m_stream, false, sizeof(podio::version::Version)); + + sio::block_list blocks; + blocks.emplace_back(std::make_shared()); + sio::api::read_blocks(buffer.span(), blocks); + + m_fileVersion = static_cast(blocks[0].get())->version; +} + +} // namespace podio diff --git a/src/SIOFrameWriter.cc b/src/SIOFrameWriter.cc new file mode 100644 index 000000000..b1c975037 --- /dev/null +++ b/src/SIOFrameWriter.cc @@ -0,0 +1,136 @@ +#include "podio/SIOFrameWriter.h" +#include "podio/CollectionBase.h" +#include "podio/CollectionIDTable.h" +#include "podio/Frame.h" +#include "podio/GenericParameters.h" +#include "podio/SIOBlock.h" + +#include +#include +#include +#include + +#include + +namespace podio { + +namespace sio_utils { + using StoreCollection = std::pair; + + std::shared_ptr createCollIDBlock(const std::vector& collections, + const podio::CollectionIDTable& collIdTable) { + // Need to make sure that the type names and subset collection bits are in + // the same order here! + std::vector types; + types.reserve(collections.size()); + std::vector subsetColl; + subsetColl.reserve(collections.size()); + std::vector names; + names.reserve(collections.size()); + std::vector ids; + ids.reserve(collections.size()); + + for (const auto& [name, coll] : collections) { + names.emplace_back(name); + ids.emplace_back(collIdTable.collectionID(name)); + types.emplace_back(coll->getValueTypeName()); + subsetColl.emplace_back(coll->isSubsetCollection()); + } + + return std::make_shared(std::move(names), std::move(ids), std::move(types), + std::move(subsetColl)); + } + + sio::block_list createBlocks(const std::vector& collections, + const podio::GenericParameters& parameters) { + sio::block_list blocks; + blocks.reserve(collections.size() + 1); // parameters + collections + + auto paramBlock = std::make_shared(); + // TODO: get rid of const_cast + paramBlock->metadata = const_cast(¶meters); + blocks.emplace_back(std::move(paramBlock)); + + for (const auto& [name, col] : collections) { + blocks.emplace_back(podio::SIOBlockFactory::instance().createBlock(col, name)); + } + + return blocks; + } + + // Write the passed record and return where it starts in the file + sio::ifstream::pos_type writeRecord(const sio::block_list& blocks, const std::string& recordName, + sio::ofstream& stream, std::size_t initBufferSize = sio::mbyte, + bool compress = true) { + auto buffer = sio::buffer{initBufferSize}; + auto recInfo = sio::api::write_record(recordName, buffer, blocks, 0); + + if (compress) { + // use zlib to compress the record into another buffer + sio::zlib_compression compressor; + compressor.set_level(6); // Z_DEFAULT_COMPRESSION==6 + auto comBuffer = sio::buffer{initBufferSize}; + sio::api::compress_record(recInfo, buffer, comBuffer, compressor); + + sio::api::write_record(stream, buffer.span(0, recInfo._header_length), comBuffer.span(), recInfo); + } else { + sio::api::write_record(stream, buffer.span(), recInfo); + } + + return recInfo._file_start; + } +} // namespace sio_utils + +SIOFrameWriter::SIOFrameWriter(const std::string& filename) { + m_stream.open(filename, std::ios::binary); + if (!m_stream.is_open()) { + SIO_THROW(sio::error_code::not_open, "Couldn't open output stream '" + filename + "'"); + } + + auto& libLoader [[maybe_unused]] = SIOBlockLibraryLoader::instance(); + + sio::block_list blocks; + blocks.emplace_back(std::make_shared(podio::version::build_version)); + // write the version uncompressed + sio_utils::writeRecord(blocks, "podio_header_info", m_stream, sizeof(podio::version::Version), false); +} + +void SIOFrameWriter::writeFrame(const podio::Frame& frame, const std::string& category) { + writeFrame(frame, category, frame.getAvailableCollections()); +} + +void SIOFrameWriter::writeFrame(const podio::Frame& frame, const std::string& category, + const std::vector& collsToWrite) { + std::vector collections; + collections.reserve(collsToWrite.size()); + for (const auto& name : collsToWrite) { + collections.emplace_back(name, frame.getCollectionForWrite(name)); + } + + // Write necessary metadata and the actual data into two different records. + // Otherwise we cannot easily unpack the data record, because necessary + // information is contained within the record. + sio::block_list tableBlocks; + tableBlocks.emplace_back(sio_utils::createCollIDBlock(collections, frame.getCollectionIDTableForWrite())); + m_tocRecord.addRecord(category, sio_utils::writeRecord(tableBlocks, category + "_HEADER", m_stream)); + + const auto blocks = sio_utils::createBlocks(collections, frame.getGenericParametersForWrite()); + sio_utils::writeRecord(blocks, category, m_stream); +} + +void SIOFrameWriter::finish() { + sio::block_list blocks; + blocks.emplace_back(std::make_shared(&m_tocRecord)); + + auto tocStartPos = sio_utils::writeRecord(blocks, sio_helpers::SIOTocRecordName, m_stream); + + // Now that we know the position of the TOC Record, put this information + // into a final marker that can be identified and interpreted when reading + // again + uint64_t finalWords = (((uint64_t)sio_helpers::SIOTocMarker) << 32) | ((uint64_t)tocStartPos & 0xffffffff); + m_stream.write(reinterpret_cast(&finalWords), sizeof(finalWords)); + + m_stream.close(); +} + +} // namespace podio diff --git a/src/SIOReader.cc b/src/SIOReader.cc index 39d43cb32..65430a3a2 100644 --- a/src/SIOReader.cc +++ b/src/SIOReader.cc @@ -25,10 +25,12 @@ CollectionBase* SIOReader::readCollection(const std::string& name) { readEvent(); } + // Have we unpacked this already? auto p = std::find_if(begin(m_inputs), end(m_inputs), [&name](const SIOReader::Input& t) { return t.second == name; }); if (p != end(m_inputs)) { + p->first->setID(m_table->collectionID(name)); p->first->prepareAfterRead(); return p->first; } @@ -87,8 +89,9 @@ void SIOReader::readEvent() { compressor.uncompress(m_rec_buffer.span(), m_unc_buffer); sio::api::read_blocks(m_unc_buffer.span(), m_blocks); - for (auto& [collection, name] : m_inputs) { - collection->setID(m_table->collectionID(name)); + for (size_t i = 1; i < m_blocks.size(); ++i) { + auto* blk = static_cast(m_blocks[i].get()); + m_inputs.emplace_back(blk->getCollection(), m_table->names()[i - 1]); } m_lastEventRead = m_eventNumber; @@ -132,7 +135,6 @@ void SIOReader::createBlocks() { const bool subsetColl = !m_subsetCollectionBits.empty() && m_subsetCollectionBits[i]; auto blk = podio::SIOBlockFactory::instance().createBlock(m_typeNames[i], m_table->names()[i], subsetColl); m_blocks.push_back(blk); - m_inputs.emplace_back(blk->getCollection(), m_table->names()[i]); } } diff --git a/src/SIOWriter.cc b/src/SIOWriter.cc index 3ac9d4258..782ddbd58 100644 --- a/src/SIOWriter.cc +++ b/src/SIOWriter.cc @@ -67,6 +67,7 @@ sio::block_list SIOWriter::createBlocks() const { for (const auto& name : m_collectionsToWrite) { const podio::CollectionBase* col{nullptr}; m_store->get(name, col); + col->prepareForWrite(); blocks.emplace_back(podio::SIOBlockFactory::instance().createBlock(col, name)); } diff --git a/src/rootUtils.h b/src/rootUtils.h index e8f87a5f9..5bce3d702 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -3,6 +3,7 @@ #include "podio/CollectionBase.h" #include "podio/CollectionBranches.h" +#include "podio/CollectionBuffers.h" #include "podio/CollectionIDTable.h" #include "TBranch.h" @@ -16,6 +17,42 @@ #include namespace podio::root_utils { +/** + * The name of the meta data tree in podio ROOT files. This tree mainly stores + * meta data that is necessary for ROOT based I/O. + */ +constexpr static auto metaTreeName = "podio_metadata"; + +/** + * The name of the branch in the TTree for each frame for storing the + * GenericParameters + */ +constexpr static auto paramBranchName = "PARAMETERS"; + +/** + * The name of the branch into which we store the build version of podio at the + * time of writing the file + */ +constexpr static auto versionBranchName = "PodioBuildVersion"; + +/** + * Name of the branch for storing the idTable for a given category in the meta + * data tree + */ +inline std::string idTableName(const std::string& category) { + constexpr static auto suffix = "___idTable"; + return category + suffix; +} + +/** + * Name of the branch for storing the collection info for a given category in + * the meta data tree + */ +inline std::string collInfoName(const std::string& category) { + constexpr static auto suffix = "___CollectionTypeInfo"; + return category + suffix; +} + // Workaround slow branch retrieval for 6.22/06 performance degradation // see: https://root-forum.cern.ch/t/serious-degradation-of-i-o-performance-from-6-20-04-to-6-22-06/43584/10 template @@ -23,6 +60,11 @@ TBranch* getBranch(Tree* chain, const char* name) { return static_cast(chain->GetListOfBranches()->FindObject(name)); } +template +TBranch* getBranch(Tree* chain, const std::string& name) { + return getBranch(chain, name.c_str()); +} + inline std::string refBranch(const std::string& name, size_t index) { return name + "#" + std::to_string(index); } @@ -31,8 +73,8 @@ inline std::string vecBranch(const std::string& name, size_t index) { return name + "_" + std::to_string(index); } -inline void setCollectionAddresses(podio::CollectionBase* collection, const CollectionBranches& branches) { - const auto collBuffers = collection->getBuffers(); +template +inline void setCollectionAddresses(const BufferT& collBuffers, const CollectionBranches& branches) { if (auto buffer = collBuffers.data) { branches.data->SetAddress(buffer); @@ -56,6 +98,19 @@ inline void setCollectionAddresses(podio::CollectionBase* collection, const Coll // collection using CollectionInfoT = std::tuple; +inline void readBranchesData(const CollectionBranches& branches, Long64_t entry) { + // Read all data + if (branches.data) { + branches.data->GetEntry(entry); + } + for (auto* br : branches.refs) { + br->GetEntry(entry); + } + for (auto* br : branches.vecs) { + br->GetEntry(entry); + } +} + /** * reconstruct the collection info from information that is available from other * trees in the file. diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cde097bcc..ce31b0b19 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -35,7 +35,17 @@ function(CREATE_PODIO_TEST sourcefile additional_libs) ) endfunction() -set(root_dependent_tests write.cpp read.cpp read-multiple.cpp relation_range.cpp read_and_write.cpp read_and_write_associated.cpp write_timed.cpp read_timed.cpp) +set(root_dependent_tests + write.cpp + read.cpp + read-multiple.cpp + relation_range.cpp + read_and_write.cpp + read_and_write_associated.cpp + write_timed.cpp + read_timed.cpp + read_frame.cpp + write_frame_root.cpp) set(root_libs TestDataModelDict podio::podioRootIO) foreach( sourcefile ${root_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${root_libs}") @@ -111,7 +121,14 @@ endif() if (TARGET TestDataModelSioBlocks) - set(sio_dependent_tests write_sio.cpp read_sio.cpp read_and_write_sio.cpp write_timed_sio.cpp read_timed_sio.cpp) + set(sio_dependent_tests + write_sio.cpp + read_sio.cpp + read_and_write_sio.cpp + write_timed_sio.cpp + read_timed_sio.cpp + read_frame_sio.cpp + write_frame_sio.cpp) set(sio_libs podio::podioSioIO) foreach( sourcefile ${sio_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${sio_libs}") @@ -129,6 +146,7 @@ set_property(TEST read PROPERTY DEPENDS write) set_property(TEST read-multiple PROPERTY DEPENDS write) set_property(TEST read_and_write PROPERTY DEPENDS write) set_property(TEST read_timed PROPERTY DEPENDS write_timed) +set_property(TEST read_frame PROPERTY DEPENDS write_frame_root) add_executable(check_benchmark_outputs check_benchmark_outputs.cpp) target_link_libraries(check_benchmark_outputs ROOT::Tree) @@ -140,6 +158,7 @@ if (TARGET read_sio) set_property(TEST read_sio PROPERTY DEPENDS write_sio) set_property(TEST read_and_write_sio PROPERTY DEPENDS write_sio) set_property(TEST read_timed_sio PROPERTY DEPENDS write_timed_sio) + set_property(TEST read_frame_sio PROPERTY DEPENDS write_frame_sio) add_test(NAME check_benchmark_outputs_sio COMMAND check_benchmark_outputs write_benchmark_sio.root read_benchmark_sio.root) set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio write_timed_sio) @@ -159,7 +178,7 @@ set_property(TEST pyunittest PROPERTY DEPENDS write) configure_file(CTestCustom.cmake ${CMAKE_BINARY_DIR}/CTestCustom.cmake) find_package(Threads REQUIRED) -add_executable(unittest unittest.cpp) +add_executable(unittest unittest.cpp frame.cpp) target_link_libraries(unittest PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads) # The unittests are a bit better and they are labelled so we can put together a @@ -184,6 +203,10 @@ if (USE_SANITIZER MATCHES "Memory(WithOrigin)?") # it fails to succesfully launch the executable and execute any test. Here # we just include them in order to have them show up as failing add_test(NAME unittest COMMAND unittest ${filter_tests}) + set_property(TEST unittest + PROPERTY ENVIRONMENT + LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} + ) endif() else() include(Catch) @@ -191,6 +214,9 @@ else() WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} TEST_PREFIX "UT_" # make it possible to filter easily with -R ^UT TEST_SPEC ${filter_tests} # discover only tests that are known to not fail + PROPERTIES + ENVIRONMENT + LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} ) endif() diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index 79a9296d8..c0c3b04f5 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -21,12 +21,17 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read-multiple read-legacy-files + write_frame_root + read_frame + write_sio read_sio read_and_write_sio write_timed_sio read_timed_sio check_benchmark_outputs_sio + write_frame_sio + read_frame_sio write_ascii diff --git a/tests/frame.cpp b/tests/frame.cpp new file mode 100644 index 000000000..38dd4e3c6 --- /dev/null +++ b/tests/frame.cpp @@ -0,0 +1,361 @@ +#include "podio/Frame.h" + +#include "catch2/catch_test_macros.hpp" + +#include "datamodel/ExampleClusterCollection.h" +#include "datamodel/ExampleHitCollection.h" + +#include +#include +#include + +TEST_CASE("Frame collections", "[frame][basics]") { + auto event = podio::Frame(); + auto clusters = ExampleClusterCollection(); + clusters.create(3.14f); + clusters.create(42.0f); + + event.put(std::move(clusters), "clusters"); + + auto& coll = event.get("clusters"); + REQUIRE(coll[0].energy() == 3.14f); + REQUIRE(coll[1].energy() == 42.0f); +} + +TEST_CASE("Frame parameters", "[frame][basics]") { + auto event = podio::Frame(); + + event.putParameter("aString", "from a string literal"); + REQUIRE(event.getParameter("aString") == "from a string literal"); + + event.putParameter("someInts", {42, 123}); + const auto& ints = event.getParameter>("someInts"); + REQUIRE(ints.size() == 2); + REQUIRE(ints[0] == 42); + REQUIRE(ints[1] == 123); + + event.putParameter("someStrings", {"one", "two", "three"}); + const auto& strings = event.getParameter>("someStrings"); + REQUIRE(strings.size() == 3); + REQUIRE(strings[0] == "one"); + REQUIRE(strings[1] == "two"); + REQUIRE(strings[2] == "three"); +} + +// NOTE: Due to the extremly small tasks that are done in these tests, they will +// most likely succeed with a very high probability and only running with a +// ThreadSanitizer will detect race conditions, so make sure to have that +// enabled (-DUSE_SANITIZER=Thread) when working on these tests + +TEST_CASE("Frame collections multithreaded insert", "[frame][basics][multithread]") { + constexpr int nThreads = 10; + std::vector threads; + threads.reserve(10); + + auto frame = podio::Frame(); + + // Fill collections from different threads + for (int i = 0; i < nThreads; ++i) { + threads.emplace_back([&frame, i]() { + auto clusters = ExampleClusterCollection(); + clusters.create(i * 3.14); + clusters.create(i * 3.14); + frame.put(std::move(clusters), "clusters_" + std::to_string(i)); + + auto hits = ExampleHitCollection(); + hits.create(i * 100ULL); + hits.create(i * 100ULL); + hits.create(i * 100ULL); + frame.put(std::move(hits), "hits_" + std::to_string(i)); + }); + } + + for (auto& t : threads) { + t.join(); + } + + // Check the frame contents after all threads have finished + for (int i = 0; i < nThreads; ++i) { + auto& hits = frame.get("hits_" + std::to_string(i)); + REQUIRE(hits.size() == 3); + for (const auto h : hits) { + REQUIRE(h.cellID() == i * 100ULL); + } + + auto& clusters = frame.get("clusters_" + std::to_string(i)); + REQUIRE(clusters.size() == 2); + for (const auto c : clusters) { + REQUIRE(c.energy() == i * 3.14); + } + } +} + +// Helper function to create a frame in the tests below +auto createFrame() { + auto frame = podio::Frame(); + + frame.put(ExampleClusterCollection(), "emptyClusters"); + + // Create a few hits inline (to avoid having to have two identifiers) + auto& hits = frame.put( + []() { + auto coll = ExampleHitCollection(); + auto hit = coll.create(0x42ULL, 0., 0., 0., 0.); + auto hit2 = coll.create(0x123ULL, 1., 1., 1., 1.); + return coll; + }(), + "hits"); + + auto clusters = ExampleClusterCollection(); + auto cluster = clusters.create(3.14f); + cluster.addHits(hits[0]); + auto cluster2 = clusters.create(42.0f); + cluster2.addHits(hits[1]); + cluster2.addClusters(cluster); + + // Create a few clustes inline and relate them to the hits from above + frame.put(std::move(clusters), "clusters"); + + frame.putParameter("anInt", 42); + frame.putParameter("someFloats", {1.23f, 2.34f, 3.45f}); + + return frame; +} + +// Helper function to get names easily below +std::string makeName(const std::string& prefix, int index) { + return prefix + "_" + std::to_string(index); +} + +// The Catch2 assertions are not threadsafe +// https://github.com/catchorg/Catch2/blob/devel/docs/limitations.md#thread-safe-assertions +// This is a poor-mans implementation where it is our responsibility to only +// pass in unshared counters +void CHECK_INCREASE(const bool condition, int& counter) { + if (condition) { + counter++; + } +} + +TEST_CASE("Frame collections multithreaded insert and read", "[frame][basics][multithread]") { + constexpr int nThreads = 10; + std::vector threads; + threads.reserve(10); + + // create a pre-populated frame + auto frame = createFrame(); + + // The Catch2 assertions are not threadsafe: + // https://github.com/catchorg/Catch2/blob/devel/docs/limitations.md#thread-safe-assertions + // Count the successes in this array here and check them outside + // Once the Catch2 deficiencies are resolved, this can be changed again + std::array successes{}; + + // Fill collections from different threads + for (int i = 0; i < nThreads; ++i) { + threads.emplace_back([&frame, i, &successes]() { + auto clusters = ExampleClusterCollection(); + clusters.create(i * 3.14); + clusters.create(i * 3.14); + frame.put(std::move(clusters), makeName("clusters", i)); + + // Retrieve a few collections in between and do iust a very basic testing + auto& existingClu = frame.get("clusters"); + CHECK_INCREASE(existingClu.size() == 2, successes[i]); + auto& existingHits = frame.get("hits"); + CHECK_INCREASE(existingHits.size() == 2, successes[i]); + + auto hits = ExampleHitCollection(); + hits.create(i * 100ULL); + hits.create(i * 100ULL); + hits.create(i * 100ULL); + frame.put(std::move(hits), makeName("hits", i)); + + // Fill in a lot of new collections to trigger a rehashing of the + // internal map, which invalidates iterators + constexpr int nColls = 100; + for (int k = 0; k < nColls; ++k) { + frame.put(ExampleHitCollection(), "h_" + std::to_string(i) + "_" + std::to_string(k)); + } + }); + } + + for (auto& t : threads) { + t.join(); + } + + // Check the frame contents after all threads have finished + for (int i = 0; i < nThreads; ++i) { + // Check whether the insertsions are as expected + REQUIRE(successes[i] == 2); + + auto& hits = frame.get(makeName("hits", i)); + REQUIRE(hits.size() == 3); + for (const auto h : hits) { + REQUIRE(h.cellID() == i * 100ULL); + } + + auto& clusters = frame.get(makeName("clusters", i)); + REQUIRE(clusters.size() == 2); + for (const auto c : clusters) { + REQUIRE(c.energy() == i * 3.14); + } + } +} + +// Helper function to keep the tests below a bit easier to read and not having +// to repeat this bit all the time. This checks that the contents are the ones +// that would be expected from the createFrame above +void checkFrame(const podio::Frame& frame) { + auto& hits = frame.get("hits"); + REQUIRE(hits.size() == 2); + REQUIRE(hits[0].energy() == 0); + REQUIRE(hits[0].cellID() == 0x42ULL); + REQUIRE(hits[1].energy() == 1); + REQUIRE(hits[1].cellID() == 0x123ULL); + + REQUIRE(frame.get("emptyClusters").size() == 0); + + auto& clusters = frame.get("clusters"); + REQUIRE(clusters.size() == 2); + REQUIRE(clusters[0].energy() == 3.14f); + REQUIRE(clusters[0].Hits().size() == 1); + REQUIRE(clusters[0].Hits()[0] == hits[0]); + REQUIRE(clusters[0].Clusters().empty()); + + REQUIRE(clusters[1].energy() == 42.f); + REQUIRE(clusters[1].Hits().size() == 1); + REQUIRE(clusters[1].Hits()[0] == hits[1]); + REQUIRE(clusters[1].Clusters()[0] == clusters[0]); + + REQUIRE(frame.getParameter("anInt") == 42); + auto& floats = frame.getParameter>("someFloats"); + REQUIRE(floats.size() == 3); + REQUIRE(floats[0] == 1.23f); + REQUIRE(floats[1] == 2.34f); + REQUIRE(floats[2] == 3.45f); +} + +TEST_CASE("Frame movability", "[frame][move-semantics]") { + auto frame = createFrame(); + checkFrame(frame); // just to ensure that the setup is as expected + + SECTION("Move constructor") { + auto otherFrame = std::move(frame); + checkFrame(otherFrame); + } + + SECTION("Move assignment operator") { + auto otherFrame = podio::Frame(); + otherFrame = std::move(frame); + checkFrame(otherFrame); + } + + SECTION("Use after move construction") { + auto otherFrame = std::move(frame); // NOLINT(clang-analyzer-cplusplus.Move) clang-tidy and the Catch2 sections + // setup do not go along here + otherFrame.putParameter("aString", "Can add strings after move-constructing"); + REQUIRE(otherFrame.getParameter("aString") == "Can add strings after move-constructing"); + + otherFrame.put( + []() { + auto coll = ExampleHitCollection(); + coll.create(); + coll.create(); + coll.create(); + return coll; + }(), + "moreHits"); + + auto& hits = otherFrame.get("moreHits"); + REQUIRE(hits.size() == 3); + checkFrame(otherFrame); + } +} + +TEST_CASE("Frame parameters multithread insert", "[frame][basics][multithread]") { + // Test that parameter access is thread safe + constexpr int nThreads = 10; + std::vector threads; + threads.reserve(nThreads); + + auto frame = podio::Frame(); + + for (int i = 0; i < nThreads; ++i) { + threads.emplace_back([&frame, i]() { + frame.putParameter(makeName("int_par", i), i); + + frame.putParameter(makeName("float_par", i), (float)i); + + frame.putParameter(makeName("string_par", i), std::to_string(i)); + }); + } + + for (auto& t : threads) { + t.join(); + } + + for (int i = 0; i < nThreads; ++i) { + REQUIRE(frame.getParameter(makeName("int_par", i)) == i); + REQUIRE(frame.getParameter(makeName("float_par", i)) == (float)i); + REQUIRE(frame.getParameter(makeName("string_par", i)) == std::to_string(i)); + } +} + +TEST_CASE("Frame parameters multithread insert and read", "[frame][basics][multithread]") { + constexpr int nThreads = 10; + std::vector threads; + threads.reserve(nThreads); + + auto frame = podio::Frame(); + frame.putParameter("int_par", 42); + frame.putParameter("string_par", "some string"); + frame.putParameter("float_pars", {1.23f, 4.56f, 7.89f}); + + // The Catch2 assertions are not threadsafe: + // https://github.com/catchorg/Catch2/blob/devel/docs/limitations.md#thread-safe-assertions + // Count the successes in this array here and check them outside + // Once the Catch2 deficiencies are resolved, this can be changed again + std::array successes{}; + + for (int i = 0; i < nThreads; ++i) { + threads.emplace_back([&frame, i, &successes]() { + frame.putParameter(makeName("int", i), i); + frame.putParameter(makeName("float", i), (float)i); + + CHECK_INCREASE(frame.getParameter("int_par") == 42, successes[i]); + CHECK_INCREASE(frame.getParameter(makeName("float", i)) == (float)i, successes[i]); + + frame.putParameter(makeName("string", i), std::to_string(i)); + CHECK_INCREASE(frame.getParameter("string_par") == "some string", successes[i]); + + const auto& floatPars = frame.getParameter>("float_pars"); + CHECK_INCREASE(floatPars.size() == 3, successes[i]); + CHECK_INCREASE(floatPars[0] == 1.23f, successes[i]); + CHECK_INCREASE(floatPars[1] == 4.56f, successes[i]); + CHECK_INCREASE(floatPars[2] == 7.89f, successes[i]); + + // Fill in a lot of new parameters to trigger rehashing of the internal + // map, which invalidates iterators + constexpr int nParams = 100; + for (int k = 0; k < nParams; ++k) { + frame.putParameter(makeName("intPar", i) + std::to_string(k), i * k); + frame.putParameter(makeName("floatPar", i) + std::to_string(k), (float)i * k); + frame.putParameter(makeName("stringPar", i) + std::to_string(k), std::to_string(i * k)); + } + }); + } + + for (auto& t : threads) { + t.join(); + } + + for (int i = 0; i < nThreads; ++i) { + // Check the insertion successes + REQUIRE(successes[i] == 7); + + REQUIRE(frame.getParameter(makeName("int", i)) == i); + REQUIRE(frame.getParameter(makeName("float", i)) == (float)i); + REQUIRE(frame.getParameter(makeName("string", i)) == std::to_string(i)); + } +} diff --git a/tests/read_frame.cpp b/tests/read_frame.cpp new file mode 100644 index 000000000..5a0929627 --- /dev/null +++ b/tests/read_frame.cpp @@ -0,0 +1,7 @@ +#include "podio/ROOTFrameReader.h" + +#include "read_frame.h" + +int main() { + return read_frames("example_frame.root"); +} diff --git a/tests/read_frame.h b/tests/read_frame.h new file mode 100644 index 000000000..e9951c3b8 --- /dev/null +++ b/tests/read_frame.h @@ -0,0 +1,58 @@ +#ifndef PODIO_TESTS_READ_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_READ_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include "read_test.h" + +#include "podio/Frame.h" + +#include + +template +int read_frames(const std::string& filename) { + auto reader = ReaderT(); + reader.openFile(filename); + + if (reader.currentFileVersion() != podio::version::build_version) { + std::cerr << "The podio build version could not be read back correctly. " + << "(expected:" << podio::version::build_version << ", actual: " << reader.currentFileVersion() << ")" + << std::endl; + return 1; + } + + if (reader.getEntries("events") != 10) { + std::cerr << "Could not read back the number of events correctly. " + << "(expected:" << 10 << ", actual: " << reader.getEntries("events") << ")" << std::endl; + return 1; + } + + if (reader.getEntries("events") != reader.getEntries("other_events")) { + std::cerr << "Could not read back the number of events correctly. " + << "(expected:" << 10 << ", actual: " << reader.getEntries("other_events") << ")" << std::endl; + return 1; + } + + // Read the frames in a different order than when writing them here to make + // sure that the writing/reading order does not impose any usage requirements + for (size_t i = 0; i < reader.getEntries("events"); ++i) { + auto frame = podio::Frame(reader.readNextEntry("events")); + processEvent(frame, i, reader.currentFileVersion()); + + auto otherFrame = podio::Frame(reader.readNextEntry("other_events")); + processEvent(otherFrame, i + 100, reader.currentFileVersion()); + } + + if (reader.readNextEntry("events")) { + std::cerr << "Trying to read more frame data than is present should return a nullptr" << std::endl; + return 1; + } + + std::cout << "========================================================\n" << std::endl; + if (reader.readNextEntry("not_present")) { + std::cerr << "Trying to read non-existant frame data should return a nullptr" << std::endl; + return 1; + } + + return 0; +} + +#endif // PODIO_TESTS_READ_FRAME_H diff --git a/tests/read_frame_sio.cpp b/tests/read_frame_sio.cpp new file mode 100644 index 000000000..cdc0b8854 --- /dev/null +++ b/tests/read_frame_sio.cpp @@ -0,0 +1,7 @@ +#include "podio/SIOFrameReader.h" + +#include "read_frame.h" + +int main() { + return read_frames("example_frame.sio"); +} diff --git a/tests/read_test.h b/tests/read_test.h index 9b12cff6a..0e54f0adf 100644 --- a/tests/read_test.h +++ b/tests/read_test.h @@ -26,6 +26,7 @@ #include #include #include +#include #include template @@ -38,25 +39,47 @@ bool check_fixed_width_value(FixedWidthT actual, FixedWidthT expected, const std return true; } -void processEvent(podio::EventStore& store, int eventNum, podio::version::Version fileVersion) { +template +static constexpr bool isEventStore = std::is_same_v; - const auto& evtMD = store.getEventMetaData(); - auto evtWeight = evtMD.getValue("UserEventWeight"); +template +void processEvent(StoreT& store, int eventNum, podio::version::Version fileVersion) { + + float evtWeight = -1; + if constexpr (isEventStore) { + const auto& evtMD = store.getEventMetaData(); + evtWeight = evtMD.template getValue("UserEventWeight"); + } else { + evtWeight = store.template getParameter("UserEventWeight"); + } if (evtWeight != (float)100. * eventNum) { std::cout << " read UserEventWeight: " << evtWeight << " - expected : " << (float)100. * eventNum << std::endl; throw std::runtime_error("Couldn't read event meta data parameters 'UserEventWeight'"); } + std::stringstream ss; ss << " event_number_" << eventNum; - const auto& evtMD2 = store.getEventMetaData(); - const auto& evtName = evtMD2.getValue("UserEventName"); + std::string evtName = ""; + if constexpr (isEventStore) { + const auto& evtMD = store.getEventMetaData(); + evtName = evtMD.template getValue("UserEventName"); + } else { + evtName = store.template getParameter("UserEventName"); + } + if (evtName != ss.str()) { std::cout << " read UserEventName: " << evtName << " - expected : " << ss.str() << std::endl; throw std::runtime_error("Couldn't read event meta data parameters 'UserEventName'"); } if (fileVersion > podio::version::Version{0, 14, 1}) { - const auto& someVectorData = evtMD.getValue>("SomeVectorData"); + std::vector someVectorData{}; + if constexpr (isEventStore) { + const auto& evtMD = store.getEventMetaData(); + someVectorData = evtMD.template getValue>("SomeVectorData"); + } else { + someVectorData = store.template getParameter>("SomeVectorData"); + } if (someVectorData.size() != 4) { throw std::runtime_error("Couldn't read event meta data parameters: 'SomeVectorData'"); } @@ -70,7 +93,7 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio try { // not assigning to a variable, because it will remain unused, we just want // the exception here - store.get("notthere"); + store.template get("notthere"); } catch (const std::runtime_error& err) { if (std::string(err.what()) != "No collection \'notthere\' is present in the EventStore") { throw std::runtime_error("Trying to get non present collection \'notthere' should throw an exception"); @@ -78,17 +101,23 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio } // read collection meta data - auto& hits = store.get("hits"); - auto colMD = store.getCollectionMetaData(hits.getID()); - const auto& es = colMD.getValue("CellIDEncodingString"); - if (es != std::string("system:8,barrel:3,layer:6,slice:5,x:-16,y:-16")) { - std::cout << " meta data from collection 'hits' with id = " << hits.getID() << " read CellIDEncodingString: " << es - << " - expected : system:8,barrel:3,layer:6,slice:5,x:-16,y:-16" << std::endl; - throw std::runtime_error("Couldn't read event meta data parameters 'CellIDEncodingString'"); + auto& hits = store.template get("hits"); + if constexpr (isEventStore) { + const auto& colMD = store.getCollectionMetaData(hits.getID()); + const auto& es = colMD.template getValue("CellIDEncodingString"); + if (es != std::string("system:8,barrel:3,layer:6,slice:5,x:-16,y:-16")) { + std::cout << " meta data from collection 'hits' with id = " << hits.getID() + << " read CellIDEncodingString: " << es << " - expected : system:8,barrel:3,layer:6,slice:5,x:-16,y:-16" + << std::endl; + throw std::runtime_error("Couldn't read event meta data parameters 'CellIDEncodingString'"); + } + + } else { + // TODO: Integrate this into the frame workflow somehow } if (fileVersion > podio::version::Version{0, 14, 0}) { - auto& hitRefs = store.get("hitRefs"); + auto& hitRefs = store.template get("hitRefs"); if (hitRefs.size() != hits.size()) { throw std::runtime_error("hit and subset hit collection do not have the same size"); } @@ -97,7 +126,7 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio } } - auto& clusters = store.get("clusters"); + auto& clusters = store.template get("clusters"); if (clusters.isValid()) { auto cluster = clusters[0]; for (auto i = cluster.Hits_begin(), end = cluster.Hits_end(); i != end; ++i) { @@ -107,7 +136,7 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio throw std::runtime_error("Collection 'clusters' should be present"); } - auto& mcps = store.get("mcparticles"); + auto& mcps = store.template get("mcparticles"); if (!mcps.isValid()) { throw std::runtime_error("Collection 'mcparticles' should be present"); } @@ -175,7 +204,7 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio // Load the subset collection first to ensure that it pulls in objects taht // have not been read yet - auto& mcpRefs = store.get("mcParticleRefs"); + auto& mcpRefs = store.template get("mcParticleRefs"); if (!mcpRefs.isValid()) { throw std::runtime_error("Collection 'mcParticleRefs' should be present"); } @@ -187,7 +216,7 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio } } - auto& moreMCs = store.get("moreMCs"); + auto& moreMCs = store.template get("moreMCs"); // First check that the two mc collections that we store are the same if (mcps.size() != moreMCs.size()) { @@ -220,11 +249,11 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio } // std::cout << "Fetching collection 'refs'" << std::endl; - auto& refs = store.get("refs"); + auto& refs = store.template get("refs"); if (refs.isValid()) { auto ref = refs[0]; for (auto cluster : ref.Clusters()) { - for (auto hit : cluster.Hits()) { + for (auto hit [[maybe_unused]] : cluster.Hits()) { // std::cout << " Referenced object has an energy of " << hit.energy() << std::endl; } } @@ -232,7 +261,7 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio throw std::runtime_error("Collection 'refs' should be present"); } // std::cout << "Fetching collection 'OneRelation'" << std::endl; - auto& rels = store.get("OneRelation"); + auto& rels = store.template get("OneRelation"); if (rels.isValid()) { // std::cout << "Referenced object has an energy of " << (*rels)[0].cluster().energy() << std::endl; } else { @@ -240,7 +269,7 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio } // std::cout << "Fetching collection 'WithVectorMember'" << std::endl; - auto& vecs = store.get("WithVectorMember"); + auto& vecs = store.template get("WithVectorMember"); if (vecs.isValid()) { if (vecs.size() != 2) { throw std::runtime_error("Collection 'WithVectorMember' should have two elements'"); @@ -267,13 +296,13 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio throw std::runtime_error("Collection 'WithVectorMember' should be present"); } - auto& comps = store.get("Component"); + auto& comps = store.template get("Component"); if (comps.isValid()) { auto comp = comps[0]; int a [[maybe_unused]] = comp.component().data.x + comp.component().data.z; } - auto& arrays = store.get("arrays"); + auto& arrays = store.template get("arrays"); if (arrays.isValid() && arrays.size() != 0) { auto array = arrays[0]; if (array.myArray(1) != eventNum) { @@ -289,9 +318,10 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio throw std::runtime_error("Collection 'arrays' should be present"); } - auto& nmspaces = store.get("WithNamespaceRelation"); - auto& copies = store.get("WithNamespaceRelationCopy"); - auto& cpytest = store.create("TestConstCopy"); + auto& nmspaces = store.template get("WithNamespaceRelation"); + auto& copies = store.template get("WithNamespaceRelationCopy"); + + auto cpytest = ex42::ExampleWithARelationCollection{}; if (nmspaces.isValid() && copies.isValid()) { for (size_t j = 0; j < nmspaces.size(); j++) { auto nmsp = nmspaces[j]; @@ -328,7 +358,7 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio } if (fileVersion >= podio::version::Version{0, 13, 1}) { - const auto& fixedWidthInts = store.get("fixedWidthInts"); + const auto& fixedWidthInts = store.template get("fixedWidthInts"); if (not fixedWidthInts.isValid() or fixedWidthInts.size() != 3) { throw std::runtime_error("Collection \'fixedWidthInts\' should be present and have 3 elements"); } @@ -362,7 +392,12 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio } if (fileVersion >= podio::version::Version{0, 13, 2}) { - auto& usrInts = store.get>("userInts"); + auto& usrInts = store.template get>("userInts"); + + if (usrInts.size() != (unsigned)eventNum + 1) { + throw std::runtime_error("Could not read all userInts properly (expected: " + std::to_string(eventNum + 1) + + ", actual: " + std::to_string(usrInts.size()) + ")"); + } auto& uivec = usrInts.vec(); int myInt = 0; @@ -379,7 +414,12 @@ void processEvent(podio::EventStore& store, int eventNum, podio::version::Versio } } - auto& usrDbl = store.get>("userDoubles"); + auto& usrDbl = store.template get>("userDoubles"); + if (usrDbl.size() != 100) { + throw std::runtime_error( + "Could not read all userDoubles properly (expected: 100, actual: " + std::to_string(usrDbl.size()) + ")"); + } + for (double d : usrDbl) { if (d != 42.) { throw std::runtime_error("Couldn't read userDoubles properly"); diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 325bb81be..443584b42 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -614,6 +614,17 @@ auto createCollections(const size_t nElements = 3u) { userDataColl.push_back(3.14f * i); } + vecMemColl.prepareForWrite(); + auto buffers = vecMemColl.getBuffers(); + auto vecBuffers = buffers.vectorMembers; + auto thisVec = (*vecBuffers)[0].second; + + // const auto floatVec = podio::CollectionWriteBuffers::asVector(thisVec); + const auto floatVec2 = podio::CollectionReadBuffers::asVector(thisVec); + + // std::cout << floatVec->size() << '\n'; + std::cout << "** " << floatVec2->size() << " vs " << vecMemColl.size() << '\n'; + return colls; } @@ -689,6 +700,9 @@ void checkCollections(/*const*/ ExampleHitCollection& hits, /*const*/ ExampleClu } } +template +struct TD; + TEST_CASE("Move-only collections", "[collections][move-semantics]") { // Setup a few collections that will be used throughout below auto [hitColl, clusterColl, vecMemColl, userDataColl] = createCollections(); @@ -731,6 +745,20 @@ TEST_CASE("Move-only collections", "[collections][move-semantics]") { auto newClusters = std::move(clusterColl); vecMemColl.prepareForWrite(); + auto buffers = vecMemColl.getBuffers(); + auto vecBuffers = buffers.vectorMembers; + auto thisVec = (*vecBuffers)[0].second; + + const auto floatVec = podio::CollectionWriteBuffers::asVector(thisVec); + const auto floatVec2 = podio::CollectionReadBuffers::asVector(thisVec); + + std::cout << floatVec->size() << '\n'; + std::cout << floatVec2->size() << '\n'; + + // auto vecBuffers = buffers.vectorMembers; + // const auto vecBuffer = podio::CollectionWriteBuffers::asVector((*vecBuffers)[0].second); + // TD td; + // REQUIRE(vecBuffer->size() == 2); auto newVecMems = std::move(vecMemColl); userDataColl.prepareForWrite(); @@ -739,6 +767,9 @@ TEST_CASE("Move-only collections", "[collections][move-semantics]") { checkCollections(newHits, newClusters, newVecMems, newUserData); } + SECTION("Moved collections can be prepared") { + } + SECTION("Prepared collections can be move assigned") { hitColl.prepareForWrite(); clusterColl.prepareForWrite(); diff --git a/tests/write_frame.h b/tests/write_frame.h new file mode 100644 index 000000000..f7ccdc267 --- /dev/null +++ b/tests/write_frame.h @@ -0,0 +1,390 @@ +#ifndef PODIO_TESTS_WRITE_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_WRITE_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include "datamodel/EventInfoCollection.h" +#include "datamodel/ExampleClusterCollection.h" +#include "datamodel/ExampleHitCollection.h" +#include "datamodel/ExampleMCCollection.h" +#include "datamodel/ExampleReferencingTypeCollection.h" +#include "datamodel/ExampleWithARelationCollection.h" +#include "datamodel/ExampleWithArrayCollection.h" +#include "datamodel/ExampleWithFixedWidthIntegersCollection.h" +#include "datamodel/ExampleWithNamespaceCollection.h" +#include "datamodel/ExampleWithOneRelationCollection.h" +#include "datamodel/ExampleWithVectorMemberCollection.h" + +#include "podio/Frame.h" +#include "podio/UserDataCollection.h" + +#include +#include +#include + +static const std::vector collsToWrite = {"mcparticles", + "moreMCs", + "arrays", + "mcParticleRefs", + "hits", + "hitRefs", + "refs", + "refs2", + "clusters", + "OneRelation", + "info", + "WithVectorMember", + "fixedWidthInts", + "userInts", + "userDoubles", + "WithNamespaceMember", + "WithNamespaceRelation", + "WithNamespaceRelationCopy"}; + +auto createMCCollection() { + auto mcps = ExampleMCCollection(); + + // ---- add some MC particles ---- + auto mcp0 = mcps.create(); + auto mcp1 = mcps.create(); + auto mcp2 = mcps.create(); + auto mcp3 = mcps.create(); + auto mcp4 = mcps.create(); + auto mcp5 = mcps.create(); + auto mcp6 = mcps.create(); + auto mcp7 = mcps.create(); + auto mcp8 = mcps.create(); + auto mcp9 = mcps.create(); + + auto mcp = mcps[0]; + mcp.adddaughters(mcps[2]); + mcp.adddaughters(mcps[3]); + mcp.adddaughters(mcps[4]); + mcp.adddaughters(mcps[5]); + mcp = mcps[1]; + mcp.adddaughters(mcps[2]); + mcp.adddaughters(mcps[3]); + mcp.adddaughters(mcps[4]); + mcp.adddaughters(mcps[5]); + mcp = mcps[2]; + mcp.adddaughters(mcps[6]); + mcp.adddaughters(mcps[7]); + mcp.adddaughters(mcps[8]); + mcp.adddaughters(mcps[9]); + mcp = mcps[3]; + mcp.adddaughters(mcps[6]); + mcp.adddaughters(mcps[7]); + mcp.adddaughters(mcps[8]); + mcp.adddaughters(mcps[9]); + + //--- now fix the parent relations + // use a range-based for loop here to see if we get mutable objects from the + // begin/end iterators + for (auto mc : mcps) { + for (auto p : mc.daughters()) { + int dIndex = p.getObjectID().index; + auto d = mcps[dIndex]; + d.addparents(p); + } + } + + return mcps; +} + +auto createArrayCollection(int i) { + auto arrays = ExampleWithArrayCollection(); + + std::array arrayTest = {0, 0, 2, 3}; + std::array arrayTest2 = {4, 4, 2 * static_cast(i)}; + NotSoSimpleStruct a; + a.data.p = arrayTest2; + ex2::NamespaceStruct nstruct; + nstruct.x = static_cast(i); + std::array structArrayTest = {nstruct, nstruct, nstruct, nstruct}; + auto array = MutableExampleWithArray(a, arrayTest, arrayTest, arrayTest, arrayTest, structArrayTest); + array.myArray(1, i); + array.arrayStruct(a); + arrays.push_back(array); + + return arrays; +} + +auto createMCRefCollection(const ExampleMCCollection& mcps, const ExampleMCCollection& moreMCs) { + auto mcpsRefs = ExampleMCCollection(); + mcpsRefs.setSubsetCollection(); + // ----------------- add all "odd" mc particles into a subset collection + for (auto p : mcps) { + if (p.id() % 2) { + mcpsRefs.push_back(p); + } + } + // ----------------- add the "even" counterparts from a different collection + for (auto p : moreMCs) { + if (p.id() % 2 == 0) { + mcpsRefs.push_back(p); + } + } + + if (mcpsRefs.size() != mcps.size()) { + throw std::runtime_error( + "The mcParticleRefs collection should now contain as many elements as the mcparticles collection"); + } + + return mcpsRefs; +} + +auto createHitCollection(int i) { + ExampleHitCollection hits; + + auto hit1 = ExampleHit(0xbad, 0., 0., 0., 23. + i); + auto hit2 = ExampleHit(0xcaffee, 1., 0., 0., 12. + i); + + hits.push_back(hit1); + hits.push_back(hit2); + + return hits; +} + +auto createHitRefCollection(const ExampleHitCollection& hits) { + ExampleHitCollection hitRefs; + hitRefs.setSubsetCollection(); + + hitRefs.push_back(hits[1]); + hitRefs.push_back(hits[0]); + + return hitRefs; +} + +auto createClusterCollection(const ExampleHitCollection& hits) { + ExampleClusterCollection clusters; + + auto cluster = MutableExampleCluster(); + auto clu0 = MutableExampleCluster(); + auto clu1 = MutableExampleCluster(); + + auto hit1 = hits[0]; + auto hit2 = hits[1]; + + clu0.addHits(hit1); + clu0.energy(hit1.energy()); + clu1.addHits(hit2); + clu1.energy(hit2.energy()); + cluster.addHits(hit1); + cluster.addHits(hit2); + cluster.energy(hit1.energy() + hit2.energy()); + cluster.addClusters(clu0); + cluster.addClusters(clu1); + + clusters.push_back(clu0); + clusters.push_back(clu1); + clusters.push_back(cluster); + + return clusters; +} + +auto createReferencingCollections(const ExampleClusterCollection& clusters) { + auto retType = std::tuple(); + auto& [refs, refs2] = retType; + + auto ref = MutableExampleReferencingType(); + refs.push_back(ref); + + auto ref2 = ExampleReferencingType(); + refs2.push_back(ref2); + + ref.addClusters(clusters[2]); + ref.addRefs(ref2); + + auto cyclic = MutableExampleReferencingType(); + cyclic.addRefs(cyclic); + refs.push_back(cyclic); + + return retType; +} + +auto createOneRelCollection(const ExampleClusterCollection& clusters) { + ExampleWithOneRelationCollection oneRels; + + auto oneRel = MutableExampleWithOneRelation(); + oneRel.cluster(clusters[2]); + oneRels.push_back(oneRel); + + // write non-filled relation + auto oneRelEmpty = ExampleWithOneRelation(); + oneRels.push_back(oneRelEmpty); + + return oneRels; +} + +auto createVectorMemberCollection(int i) { + ExampleWithVectorMemberCollection vecs; + + auto vec = MutableExampleWithVectorMember(); + vec.addcount(i); + vec.addcount(i + 10); + vecs.push_back(vec); + auto vec1 = MutableExampleWithVectorMember(); + vec1.addcount(i + 1); + vec1.addcount(i + 11); + vecs.push_back(vec1); + + return vecs; +} + +auto createInfoCollection(int i) { + EventInfoCollection info; + + auto item1 = MutableEventInfo(); + item1.Number(i); + info.push_back(item1); + + return info; +} + +auto createFixedWidthCollection() { + auto fixedWidthInts = ExampleWithFixedWidthIntegersCollection(); + + auto maxValues = fixedWidthInts.create(); + maxValues.fixedI16(std::numeric_limits::max()); // 2^(16 - 1) - 1 == 32767 + maxValues.fixedU32(std::numeric_limits::max()); // 2^32 - 1 == 4294967295 + maxValues.fixedU64(std::numeric_limits::max()); // 2^64 - 1 == 18446744073709551615 + auto& maxComp = maxValues.fixedWidthStruct(); + maxComp.fixedUnsigned16 = std::numeric_limits::max(); // 2^16 - 1 == 65535 + maxComp.fixedInteger64 = std::numeric_limits::max(); // 2^(64 -1) - 1 == 9223372036854775807 + maxComp.fixedInteger32 = std::numeric_limits::max(); // 2^(32 - 1) - 1 == 2147483647 + + auto minValues = fixedWidthInts.create(); + minValues.fixedI16(std::numeric_limits::min()); // -2^(16 - 1) == -32768 + minValues.fixedU32(std::numeric_limits::min()); // 0 + minValues.fixedU64(std::numeric_limits::min()); // 0 + auto& minComp = minValues.fixedWidthStruct(); + minComp.fixedUnsigned16 = std::numeric_limits::min(); // 0 + minComp.fixedInteger64 = std::numeric_limits::min(); // -2^(64 - 1) == -9223372036854775808 + minComp.fixedInteger32 = std::numeric_limits::min(); // -2^(32 - 1) == -2147483648 + + auto arbValues = fixedWidthInts.create(); + arbValues.fixedI16(-12345); + arbValues.fixedU32(1234567890); + arbValues.fixedU64(1234567890123456789); + auto& arbComp = arbValues.fixedWidthStruct(); + arbComp.fixedUnsigned16 = 12345; + arbComp.fixedInteger32 = -1234567890; + arbComp.fixedInteger64 = -1234567890123456789ll; + + return fixedWidthInts; +} + +auto createUserDataCollections(int i) { + auto retType = std::tuple, podio::UserDataCollection>(); + auto& [usrInts, usrDoubles] = retType; + + // add some plain ints as user data + usrInts.resize(i + 1); + int myInt = 0; + for (auto& iu : usrInts) { + iu = myInt++; + } + + // and some user double values + unsigned nd = 100; + usrDoubles.resize(nd); + for (unsigned id = 0; id < nd; ++id) { + usrDoubles[id] = 42.; + } + + return retType; +} + +auto createNamespaceRelationCollection(int i) { + auto retVal = std::tuple{}; + auto& [namesps, namesprels, cpytest] = retVal; + + for (int j = 0; j < 5; j++) { + auto rel = ex42::MutableExampleWithARelation(); + rel.number(0.5 * j); + auto exWithNamesp = ex42::MutableExampleWithNamespace(); + exWithNamesp.component().x = i; + exWithNamesp.component().y = 1000 * i; + namesps.push_back(exWithNamesp); + if (j != 3) { // also check for empty relations + rel.ref(exWithNamesp); + for (int k = 0; k < 5; k++) { + auto namesp = ex42::MutableExampleWithNamespace(); + namesp.x(3 * k); + namesp.component().y = k; + namesps.push_back(namesp); + rel.addrefs(namesp); + } + } + namesprels.push_back(rel); + } + for (auto&& namesprel : namesprels) { + cpytest.push_back(namesprel.clone()); + } + + return retVal; +} + +podio::Frame makeFrame(int iFrame) { + podio::Frame frame{}; + + frame.put(createArrayCollection(iFrame), "arrays"); + frame.put(createVectorMemberCollection(iFrame), "WithVectorMember"); + frame.put(createInfoCollection(iFrame), "info"); + frame.put(createFixedWidthCollection(), "fixedWidthInts"); + + auto& mcps = frame.put(createMCCollection(), "mcparticles"); + + ExampleMCCollection moreMCs{}; + for (const auto&& mc : mcps) { + moreMCs.push_back(mc.clone()); + } + auto& otherMCs = frame.put(std::move(moreMCs), "moreMCs"); + frame.put(createMCRefCollection(mcps, otherMCs), "mcParticleRefs"); + + const auto& hits = frame.put(createHitCollection(iFrame), "hits"); + frame.put(createHitRefCollection(hits), "hitRefs"); + + const auto& clusters = frame.put(createClusterCollection(hits), "clusters"); + + auto [refs, refs2] = createReferencingCollections(clusters); + frame.put(std::move(refs), "refs"); + frame.put(std::move(refs2), "refs2"); + + frame.put(createOneRelCollection(clusters), "OneRelation"); + + auto [usrInts, usrDoubles] = createUserDataCollections(iFrame); + frame.put(std::move(usrInts), "userInts"); + frame.put(std::move(usrDoubles), "userDoubles"); + + auto [namesps, namespsrels, cpytest] = createNamespaceRelationCollection(iFrame); + frame.put(std::move(namesps), "WithNamespaceMember"); + frame.put(std::move(namespsrels), "WithNamespaceRelation"); + frame.put(std::move(cpytest), "WithNamespaceRelationCopy"); + + // Parameters + frame.putParameter("anInt", 42 + iFrame); + frame.putParameter("UserEventWeight", 100.f * iFrame); + frame.putParameter("UserEventName", " event_number_" + std::to_string(iFrame)); + frame.putParameter("SomeVectorData", {1, 2, 3, 4}); + + return frame; +} + +template +void write_frames(const std::string& filename) { + WriterT writer(filename); + + for (int i = 0; i < 10; ++i) { + auto frame = makeFrame(i); + writer.writeFrame(frame, "events", collsToWrite); + } + + for (int i = 100; i < 110; ++i) { + auto frame = makeFrame(i); + writer.writeFrame(frame, "other_events"); + } + + writer.finish(); +} + +#endif // PODIO_TESTS_WRITE_FRAME_H diff --git a/tests/write_frame_root.cpp b/tests/write_frame_root.cpp new file mode 100644 index 000000000..fd1d89beb --- /dev/null +++ b/tests/write_frame_root.cpp @@ -0,0 +1,8 @@ +#include "write_frame.h" + +#include "podio/ROOTFrameWriter.h" + +int main(int, char**) { + write_frames("example_frame.root"); + return 0; +} diff --git a/tests/write_frame_sio.cpp b/tests/write_frame_sio.cpp new file mode 100644 index 000000000..31df08171 --- /dev/null +++ b/tests/write_frame_sio.cpp @@ -0,0 +1,8 @@ +#include "write_frame.h" + +#include "podio/SIOFrameWriter.h" + +int main(int, char**) { + write_frames("example_frame.sio"); + return 0; +} From 89be4a002d641515ef51f6b54716d667a564ca27 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 27 Sep 2022 10:53:57 +0200 Subject: [PATCH 003/100] Fix ROOT warnings for non-streamable mutexes (#334) --- src/selection.xml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/selection.xml b/src/selection.xml index 1777b2d3d..d2c0d9adb 100644 --- a/src/selection.xml +++ b/src/selection.xml @@ -1,6 +1,10 @@ - + + + + + From 78cd3a0d60ad0bec5b470b6fb1c4832a0b6e7baa Mon Sep 17 00:00:00 2001 From: Andre Sailer Date: Tue, 27 Sep 2022 14:10:07 +0200 Subject: [PATCH 004/100] Cmake tweaks (#336) * PodioConfig: set ENABLE_SIO variable to avoid warning about cmake policy CMP0012 in downstream packages * CMake: Explicitly require version 3 (or newer) for Catch2 --- cmake/podioConfig.cmake.in | 3 ++- tests/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/podioConfig.cmake.in b/cmake/podioConfig.cmake.in index 6ae9a41cc..d3d1a8ed0 100644 --- a/cmake/podioConfig.cmake.in +++ b/cmake/podioConfig.cmake.in @@ -31,7 +31,8 @@ else() find_dependency(Python COMPONENTS Interpreter) endif() -if(@ENABLE_SIO@) +SET(ENABLE_SIO @ENABLE_SIO@) +if(ENABLE_SIO) find_dependency(SIO) # Targets from SIO only become available with v00-01 so we rig them here to be # able to use them diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ce31b0b19..b67d80585 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -90,7 +90,7 @@ CREATE_PODIO_TEST(ostream_operator.cpp "") CREATE_PODIO_TEST(write_ascii.cpp "") if(USE_EXTERNAL_CATCH2) - find_package(Catch2 REQUIRED) + find_package(Catch2 3 REQUIRED) else() message(STATUS "Fetching local copy of Catch2 library for unit-tests...") # Build Catch2 with the default flags, to avoid generating warnings when we From 7218c6f64398fb6b8e5c22e6510c93c76887063f Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 4 Oct 2022 15:44:33 +0200 Subject: [PATCH 005/100] Make the notebook pattern use vectors instead of arrays (#337) --- doc/examples.md | 10 ++++++---- python/templates/Collection.cc.jinja2 | 4 ++++ python/templates/Collection.h.jinja2 | 7 +------ python/templates/macros/collections.jinja2 | 12 ++++++------ tests/unittest.cpp | 15 +++++++++------ 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/doc/examples.md b/doc/examples.md index 86a93262f..6e08dbe0e 100644 --- a/doc/examples.md +++ b/doc/examples.md @@ -88,12 +88,14 @@ copy of only the data that are needed for a particular calculation. This pattern is supported by providing access like ```cpp - auto x_array = hits.x<10>(); // returning values of - auto y_array = hits.y<10>(); // the first 10 elements + auto x_array = hits.x(); // returning all values + auto y_array = hits.y(10); // or only the first 10 elements ``` -The resulting `std::array` can then be used in (auto-)vectorizable code. -If less objects than requested are contained in the collection, the remaining entries are default initialized. +The resulting `std::vector` can then be used in (auto-)vectorizable code. +Passing in a size argument is optional; If no argument is passed all elements will be returned, +if an argument is passed only as many elements as requested will be returned. +If the collection holds less elements than are requested, only as elements as are available will be returned. ### EventStore functionality diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index bf7fd9e0d..06069621b 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -174,6 +174,10 @@ podio::CollectionReadBuffers {{ collection_type }}::createBuffers() /*const*/ { return readBuffers; } +{% for member in Members %} +{{ macros.vectorized_access(class, member) }} +{% endfor %} + #ifdef PODIO_JSON_OUTPUT void to_json(nlohmann::json& j, const {{ collection_type }}& collection) { j = nlohmann::json::array(); diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index b6f444e49..039fe0d6a 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -145,8 +145,7 @@ public: } {% for member in Members %} - template - const std::array<{{ member.full_type }}, arraysize> {{ member.name }}() const; + std::vector<{{ member.full_type }}> {{ member.name }}(const size_t nElem = 0) const; {% endfor %} private: @@ -184,10 +183,6 @@ Mutable{{ class.bare_type }} {{ class.bare_type }}Collection::create(Args&&... a return Mutable{{ class.bare_type }}(obj); } -{% for member in Members %} -{{ macros.vectorized_access(class, member) }} -{% endfor %} - #ifdef PODIO_JSON_OUTPUT void to_json(nlohmann::json& j, const {{ class.bare_type }}Collection& collection); #endif diff --git a/python/templates/macros/collections.jinja2 b/python/templates/macros/collections.jinja2 index bfe2e49a1..f91dfbedf 100644 --- a/python/templates/macros/collections.jinja2 +++ b/python/templates/macros/collections.jinja2 @@ -1,10 +1,10 @@ {% macro vectorized_access(class, member) %} -template -const std::array<{{ member.full_type }}, arraysize> {{ class.bare_type }}Collection::{{ member.name }}() const { - std::array<{{ member.full_type }}, arraysize> tmp{}; - const auto valid_size = std::min(arraysize, m_storage.entries.size()); - for (unsigned i = 0; i < valid_size; ++i) { - tmp[i] = m_storage.entries[i]->data.{{ member.name }}; +std::vector<{{ member.full_type }}> {{ class.bare_type }}Collection::{{ member.name }}(const size_t nElem) const { + std::vector<{{ member.full_type }}> tmp; + const auto valid_size = nElem != 0 ? std::min(nElem, m_storage.entries.size()) : m_storage.entries.size(); + tmp.reserve(valid_size); + for (size_t i = 0; i < valid_size; ++i) { + tmp.emplace_back(m_storage.entries[i]->data.{{ member.name }}); } return tmp; } diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 443584b42..17b39f06b 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -177,21 +177,24 @@ TEST_CASE("Looping", "[basics]") { } TEST_CASE("Notebook", "[basics]") { - bool success = true; auto store = podio::EventStore(); auto& hits = store.create("hits"); for (unsigned i = 0; i < 12; ++i) { auto hit = hits.create(0xcaffeeULL, 0., 0., 0., double(i)); } - auto energies = hits.energy<10>(); + + // Request only subset + auto energies = hits.energy(10); + REQUIRE(energies.size() == 10); int index = 0; for (auto energy : energies) { - if (double(index) != energy) { - success = false; - } + REQUIRE(double(index) == energy); ++index; } - REQUIRE(success); + + // Make sure there are no "invented" values + REQUIRE(hits.energy(24).size() == hits.size()); + REQUIRE(hits.energy().size() == hits.size()); } TEST_CASE("OneToOneRelations", "[basics][relations]") { From 1ad911d07815f66ac4f4ae57f8cde3e4910d6975 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 6 Oct 2022 14:05:29 +0200 Subject: [PATCH 006/100] Release Notes for v00-16 --- doc/ReleaseNotes.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/ReleaseNotes.md b/doc/ReleaseNotes.md index 6ef46c40d..594371561 100644 --- a/doc/ReleaseNotes.md +++ b/doc/ReleaseNotes.md @@ -1,3 +1,31 @@ +# v00-16 + +* 2022-10-04 Thomas Madlener ([PR#337](https://github.com/AIDASoft/podio/pull/337)) + - Make the notebook pattern functionality return `std::vector`s instead of `std::array` to avoid having to specify a static size. Fixes #332 + - **Backwards incompatible change** as the return type as well as the call signature for the notebook pattern change. + +* 2022-09-27 Andre Sailer ([PR#336](https://github.com/AIDASoft/podio/pull/336)) + - podioConfig.cmake: silence warning about cmake policy CMP00012 + - CMake: explicitly look for catch2 version 3 and fail at cmake instead of compile step + +* 2022-09-27 Thomas Madlener ([PR#334](https://github.com/AIDASoft/podio/pull/334)) + - Fix a warning/error message from ROOT from attempts to stream the `std::mutex` members of `GenericParameters` by marking them as transient for the dictionary generation. + +* 2022-09-16 Thomas Madlener ([PR#323](https://github.com/AIDASoft/podio/pull/323)) + - Add a `podio-dump` python script (installed to `/bin` that can be used to dump event contents to stdout. By default prints an overview over the collections and their types, but can also be used to dump full events, via the `-d` or `--detailed` flag. Use `--help` to get all available options and their descriptions. + - To allow `podio-dump` to work with all available backends also add support for reading SIO via the `PythonEventStore`. + - Split off the necessary c++ functionality into a separate `podioPythonStore` library (+ necessary ROOT dictionaries). + - Add a `print` function to the collections for easier dumping from the python side. + - Add a `print` function to the `GenericParameters` + - Make `goToEvent` is a part of the `IReader` interface and correctly implemented it for the `SIOReader`. + +* 2022-09-16 Thomas Madlener ([PR#287](https://github.com/AIDASoft/podio/pull/287)) + - Introduce the `podio::Frame` as a generalized, thread-safe (event) data container. + - This first version offers all necessary functionality and an almost finalized interface, i.e. we plan to keep this as stable as possible, but we might still change things if it turns out that there are better ways to do some things + - For details about the basic interface and the underlying design considerations please consult the corresponding [documentation](https://github.com/AIDASoft/podio/doc/frame.md) + - **This will be the only way to work with podio data starting from version 1.0** + - For now the current I/O implementations remain in place unchanged, but they will be deprecated (and removed) in the not too distant future + # v00-15 * 2022-08-09 Thomas Madlener ([PR#312](https://github.com/AIDASoft/podio/pull/312)) From c60ac6f4fb6530732e4320c1536f846f0f1aee2a Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 6 Oct 2022 14:05:30 +0200 Subject: [PATCH 007/100] Updating version to v00-16 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fba80e7c3..175085b87 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ project(podio) #--- Version ------------------------------------------------------------------- SET( ${PROJECT_NAME}_VERSION_MAJOR 0 ) -SET( ${PROJECT_NAME}_VERSION_MINOR 15 ) +SET( ${PROJECT_NAME}_VERSION_MINOR 16 ) SET( ${PROJECT_NAME}_VERSION_PATCH 0 ) SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" ) From 051a3fbb6bbb7fa41f65b3ed286857fe6f3a0359 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 21 Oct 2022 17:16:10 +0200 Subject: [PATCH 008/100] Update CI workflows and add EDM4hep workflow (#335) * Use latest versions of actions and introduce log groups * Update to LCG_102 in test lcg based environments * Add workflow that builds EDM4hep after podio --- .github/workflows/coverity.yml | 6 +-- .github/workflows/edm4hep.yaml | 62 +++++++++++++++++++++++++++++++ .github/workflows/key4hep.yml | 12 +++++- .github/workflows/pre-commit.yml | 12 ++++-- .github/workflows/sanitizers.yaml | 15 +++++--- .github/workflows/test.yml | 17 ++++++--- .github/workflows/ubuntu.yml | 15 ++++++-- python/podio_class_generator.py | 8 ++-- python/test_MemberParser.py | 1 + tools/podio-dump | 6 +-- 10 files changed, 125 insertions(+), 29 deletions(-) create mode 100644 .github/workflows/edm4hep.yaml diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index dbb6aa6ba..d7d5de93d 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -10,11 +10,11 @@ jobs: if: github.repository == 'AIDASoft/podio' steps: - uses: actions/checkout@v2 - - uses: cvmfs-contrib/github-action-cvmfs@v2 - - uses: aidasoft/run-lcg-view@v2 + - uses: cvmfs-contrib/github-action-cvmfs@v3 + - uses: aidasoft/run-lcg-view@v4 with: coverity-cmake-command: 'cmake -DCMAKE_CXX_STANDARD=17 -DENABLE_SIO=ON -DUSE_EXTERNAL_CATCH2=OFF ..' coverity-project: 'AIDASoft%2Fpodio' coverity-project-token: ${{ secrets.PODIO_COVERITY_TOKEN }} github-pat: ${{ secrets.READ_COVERITY_IMAGE }} - release-platform: "LCG_99/x86_64-centos7-gcc10-opt" + release-platform: "LCG_102/x86_64-centos7-gcc11-opt" diff --git a/.github/workflows/edm4hep.yaml b/.github/workflows/edm4hep.yaml new file mode 100644 index 000000000..261c7b054 --- /dev/null +++ b/.github/workflows/edm4hep.yaml @@ -0,0 +1,62 @@ +name: edm4hep + +on: [push, pull_request] + +jobs: + build-and-test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + LCG: ["LCG_102/x86_64-centos7-gcc11-opt", + "LCG_102/x86_64-ubuntu2004-gcc9-opt"] + steps: + - uses: actions/checkout@v2 + - uses: cvmfs-contrib/github-action-cvmfs@v3 + - uses: aidasoft/run-lcg-view@v4 + with: + release-platform: ${{ matrix.LCG }} + run: | + STARTDIR=$(pwd) + echo "::group::Build Catch2" + cd /home/runner/work/podio/ + git clone --branch v3.1.0 --depth 1 https://github.com/catchorg/Catch2 + cd Catch2 + mkdir build && cd build + cmake -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=../install -G Ninja .. + ninja -k0 install + cd .. + export CMAKE_PREFIX_PATH=$(pwd)/install:$CMAKE_PREFIX_PATH + echo "::endgroup::" + echo "::group::Build podio" + cd $STARTDIR + mkdir build && cd build + cmake -DENABLE_SIO=${{ matrix.sio }} \ + -DCMAKE_INSTALL_PREFIX=../install \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ + -DUSE_EXTERNAL_CATCH2=ON \ + -G Ninja .. + ninja -k0 + echo "::endgroup::" + echo "::group::Test and install podio" + ctest --output-on-failure + ninja install + cd .. + export ROOT_INCLUDE_PATH=$(pwd)/install/include:$ROOT_INCLUDE_PATH:$CPATH + unset CPATH + export CMAKE_PREFIX_PATH=$(pwd)/install:$CMAKE_PREFIX_PATH + export LD_LIBRARY_PATH=$(pwd)/install/lib:$(pwd)/install/lib64:$LD_LIBRARY_PATH + echo "::endgroup::" + echo "::group::Build and test EDM4hep" + cd /home/runner/work/podio + git clone --depth 1 https://github.com/key4hep/EDM4hep + cd EDM4hep + mkdir build && cd build + cmake -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ + -DUSE_EXTERNAL_CATCH2=ON \ + -G Ninja .. + ninja -k0 + ctest --output-on-failure + echo "::endgroup::" diff --git a/.github/workflows/key4hep.yml b/.github/workflows/key4hep.yml index 2f6f7e7dd..0b2479581 100644 --- a/.github/workflows/key4hep.yml +++ b/.github/workflows/key4hep.yml @@ -11,12 +11,13 @@ jobs: "sw-nightlies.hsf.org/key4hep"] steps: - uses: actions/checkout@v2 - - uses: cvmfs-contrib/github-action-cvmfs@v2 - - uses: aidasoft/run-lcg-view@v3 + - uses: cvmfs-contrib/github-action-cvmfs@v3 + - uses: aidasoft/run-lcg-view@v4 with: container: centos7 view-path: /cvmfs/${{ matrix.release }} run: | + echo "::group::Run CMake" mkdir build install cd build cmake -DENABLE_SIO=ON \ @@ -25,6 +26,13 @@ jobs: -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ -DUSE_EXTERNAL_CATCH2=ON \ -G Ninja .. + echo "::endgroup::" + echo "::group::Build" ninja -k0 + echo "::endgroup" + echo "::group::Run tests" ctest --output-on-failure + echo "::endgroup::" + echo "::group::Install" ninja install + echo "::endgroup::" diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 3e104e1c5..3b867c880 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -7,15 +7,18 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: cvmfs-contrib/github-action-cvmfs@v2 - - uses: aidasoft/run-lcg-view@v3 + - uses: cvmfs-contrib/github-action-cvmfs@v3 + - uses: aidasoft/run-lcg-view@v4 with: - release-platform: LCG_99/x86_64-centos7-clang10-opt + release-platform: LCG_102/x86_64-centos7-clang12-opt run: | + echo "::group::Setup pre-commit" export PYTHONPATH=$(python -m site --user-site):$PYTHONPATH export PATH=/root/.local/bin:$PATH pip install argparse --user pip install pre-commit --user + echo "::endgroup::" + echo "::group::Run CMake" mkdir build cd build cmake .. -DENABLE_SIO=ON \ @@ -25,6 +28,9 @@ jobs: -DUSE_EXTERNAL_CATCH2=OFF ln -s $(pwd)/compile_commands.json ../ cd .. + echo "::endgroup::" + echo "::group::Run pre-commit" pre-commit run --show-diff-on-failure \ --color=always \ --all-files + echo "::endgroup::" diff --git a/.github/workflows/sanitizers.yaml b/.github/workflows/sanitizers.yaml index ff156f5ac..65904546d 100644 --- a/.github/workflows/sanitizers.yaml +++ b/.github/workflows/sanitizers.yaml @@ -7,7 +7,7 @@ jobs: strategy: fail-fast: false matrix: - compiler: [gcc10, clang10] + compiler: [gcc11, clang12] # Since Leak is usually part of Address, we do not run it separately in # CI. Keeping Address and Undefined separate for easier debugging sanitizer: [Thread, @@ -23,12 +23,12 @@ jobs: # sanitizer: MemoryWithOrigin steps: - uses: actions/checkout@v2 - - uses: cvmfs-contrib/github-action-cvmfs@v2 - - uses: aidasoft/run-lcg-view@v2 + - uses: cvmfs-contrib/github-action-cvmfs@v3 + - uses: aidasoft/run-lcg-view@v4 with: - release-platform: LCG_99/x86_64-centos7-${{ matrix.compiler }}-opt + release-platform: LCG_102/x86_64-centos7-${{ matrix.compiler }}-opt run: | - set -x + echo "::group::Run CMake" mkdir build cd build cmake -DCMAKE_BUILD_TYPE=Debug \ @@ -38,5 +38,10 @@ jobs: -DUSE_EXTERNAL_CATCH2=OFF \ -DENABLE_SIO=ON \ -G Ninja .. + echo "::endgroup::" + echo "::group::Build" ninja -k0 + echo "::endgroup::" + echo "::group::Run tests" ctest --output-on-failure + echo "::endgroup::" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 21f7f6892..2e507d0bc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,19 +8,19 @@ jobs: fail-fast: false matrix: sio: [ON] - LCG: ["LCG_99/x86_64-centos7-gcc8-opt", - "LCG_99/x86_64-centos7-clang10-opt", - "LCG_99/x86_64-centos8-gcc10-opt", + LCG: ["LCG_102/x86_64-centos7-clang12-opt", + "LCG_102/x86_64-centos8-gcc11-opt", "dev3/x86_64-centos7-clang12-opt", "dev4/x86_64-centos7-gcc11-opt", "dev4/x86_64-centos7-clang12-opt"] steps: - uses: actions/checkout@v2 - - uses: cvmfs-contrib/github-action-cvmfs@v2 - - uses: aidasoft/run-lcg-view@v2 + - uses: cvmfs-contrib/github-action-cvmfs@v3 + - uses: aidasoft/run-lcg-view@v4 with: release-platform: ${{ matrix.LCG }} run: | + echo "::group::Run CMake" mkdir build install cd build cmake -DENABLE_SIO=${{ matrix.sio }} \ @@ -29,6 +29,13 @@ jobs: -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ -DUSE_EXTERNAL_CATCH2=OFF \ -G Ninja .. + echo "::endgroup::" + echo "::group::Build" ninja -k0 + echo "::endgroup" + echo "::group::Run tests" ctest --output-on-failure + echo "::endgroup::" + echo "::group::Install" ninja install + echo "::endgroup::" diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 4a2f96121..3d579dcb1 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -9,16 +9,16 @@ jobs: fail-fast: false matrix: sio: [ON] - LCG: ["LCG_99/x86_64-ubuntu2004-gcc9-opt", - "dev3/x86_64-ubuntu2004-gcc9-opt", + LCG: ["dev3/x86_64-ubuntu2004-gcc9-opt", "dev4/x86_64-ubuntu2004-gcc9-opt"] steps: - uses: actions/checkout@v2 - - uses: cvmfs-contrib/github-action-cvmfs@v2 - - uses: aidasoft/run-lcg-view@v2 + - uses: cvmfs-contrib/github-action-cvmfs@v3 + - uses: aidasoft/run-lcg-view@v4 with: release-platform: ${{ matrix.LCG }} run: | + echo "::group::Run CMake" mkdir build install cd build cmake -DENABLE_SIO=${{ matrix.sio }} \ @@ -27,6 +27,13 @@ jobs: -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ -DUSE_EXTERNAL_CATCH2=OFF \ -G Ninja .. + echo "::endgroup::" + echo "::group::Build" ninja -k0 + echo "::endgroup" + echo "::group::Run tests" ctest --output-on-failure + echo "::endgroup::" + echo "::group::Install" ninja install + echo "::endgroup::" diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 71c1bd6d1..a4b587d4e 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -487,17 +487,17 @@ def read_upstream_edm(name_path): try: name, path = name_path.split(':') - except ValueError: + except ValueError as err: raise argparse.ArgumentTypeError('upstream-edm argument needs to be the upstream package ' - 'name and the upstream edm yaml file separated by a colon') + 'name and the upstream edm yaml file separated by a colon') from err if not os.path.isfile(path): raise argparse.ArgumentTypeError(f'{path} needs to be an EDM yaml file') try: return PodioConfigReader.read(path, name) - except DefinitionError: - raise argparse.ArgumentTypeError(f'{path} does not contain a valid datamodel definition') + except DefinitionError as err: + raise argparse.ArgumentTypeError(f'{path} does not contain a valid datamodel definition') from err if __name__ == "__main__": diff --git a/python/test_MemberParser.py b/python/test_MemberParser.py index 43f4a0c12..572c33cd0 100644 --- a/python/test_MemberParser.py +++ b/python/test_MemberParser.py @@ -216,6 +216,7 @@ def test_parse_invalid(self): try: self.assertRaises(DefinitionError, parser.parse, inp) except AssertionError: + # pylint: disable-next=raise-missing-from raise AssertionError(f"'{inp}' should raise a DefinitionError from the MemberParser") def test_parse_valid_no_description(self): diff --git a/tools/podio-dump b/tools/podio-dump index ac6c55f1c..ab4efbaf9 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -6,8 +6,8 @@ from EventStore import EventStore def dump_evt_overview(event, ievt): """Print an overview table of the event contents of the given event""" - print('{:#^82}'.format(f' Event {ievt} ')) - print('{:<30} {:<40} {:<10}'.format('Name', 'Type', 'Size')) + print('{:#^82}'.format(f' Event {ievt} ')) # pylint: disable=consider-using-f-string + print(f'{"Name":<30} {"Type":<40} {"Size":<10}') print('-' * 82) for name in event.collections(): coll = event.get(name) @@ -24,7 +24,7 @@ def dump_overview(store, events): def dump_evt_detailed(event, ievt): """Dump this event in all its glory""" print() - print('{:#^82}'.format(f' Event {ievt} ')) + print('{:#^82}'.format(f' Event {ievt} ')) # pylint: disable=consider-using-f-string print() print('Parameters', flush=True) From e32f9a3d1e7210a867433f0ee849b47aea7abcef Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 21 Oct 2022 18:16:49 +0200 Subject: [PATCH 009/100] Make sure that EDM4hep is also built with SIO --- .github/workflows/edm4hep.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/edm4hep.yaml b/.github/workflows/edm4hep.yaml index 261c7b054..e484144e4 100644 --- a/.github/workflows/edm4hep.yaml +++ b/.github/workflows/edm4hep.yaml @@ -31,7 +31,7 @@ jobs: echo "::group::Build podio" cd $STARTDIR mkdir build && cd build - cmake -DENABLE_SIO=${{ matrix.sio }} \ + cmake -DENABLE_SIO=ON \ -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ From 0a1b6f548cf6d30bd71ec01cf95442edcda46d7b Mon Sep 17 00:00:00 2001 From: Juraj Smiesko <34742917+kjvbrt@users.noreply.github.com> Date: Mon, 24 Oct 2022 15:54:10 +0200 Subject: [PATCH 010/100] Adding reading specific entry from frame (#340) --- include/podio/ROOTFrameReader.h | 14 ++++++++++++++ include/podio/SIOFrameReader.h | 7 +++++++ src/ROOTFrameReader.cc | 10 ++++++++++ src/SIOFrameReader.cc | 7 +++++++ tests/read_frame.h | 21 +++++++++++++++++++++ 5 files changed, 59 insertions(+) diff --git a/include/podio/ROOTFrameReader.h b/include/podio/ROOTFrameReader.h index b82f44a87..4eca496be 100644 --- a/include/podio/ROOTFrameReader.h +++ b/include/podio/ROOTFrameReader.h @@ -60,6 +60,13 @@ class ROOTFrameReader { */ std::unique_ptr readNextEntry(const std::string& name); + /** + * Read the specified data entry from which a Frame can be constructed for + * the given name. In case the entry does not exist for this name or in case + * there is no data for this name, this returns a nullptr. + */ + std::unique_ptr readEntry(const std::string& name, const unsigned entry); + /// Returns number of entries for the given name unsigned getEntries(const std::string& name) const; @@ -103,6 +110,13 @@ class ROOTFrameReader { GenericParameters readEventMetaData(CategoryInfo& catInfo); + /** + * Read the data entry specified in the passed CategoryInfo, and increase the + * counter aferwards. In case the requested entry is larger than the + * available number of entries, return a nullptr. + */ + std::unique_ptr readEntry(ROOTFrameReader::CategoryInfo& catInfo); + /** * Get / read the buffers at index iColl in the passed category information */ diff --git a/include/podio/SIOFrameReader.h b/include/podio/SIOFrameReader.h index 241289ee7..11b1676b6 100644 --- a/include/podio/SIOFrameReader.h +++ b/include/podio/SIOFrameReader.h @@ -32,6 +32,13 @@ class SIOFrameReader { */ std::unique_ptr readNextEntry(const std::string& name); + /** + * Read the specified data entry from which a Frame can be constructed for + * the given name. In case the entry does not exist for this name or in + * case there is no data for this name, this returns a nullptr. + */ + std::unique_ptr readEntry(const std::string& name, const unsigned entry); + /// Returns number of entries for the given name unsigned getEntries(const std::string& name) const; diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index 30cbda383..ce761bd4d 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -34,6 +34,16 @@ GenericParameters ROOTFrameReader::readEventMetaData(ROOTFrameReader::CategoryIn std::unique_ptr ROOTFrameReader::readNextEntry(const std::string& name) { auto& catInfo = getCategoryInfo(name); + return readEntry(catInfo); +} + +std::unique_ptr ROOTFrameReader::readEntry(const std::string& name, const unsigned entNum) { + auto& catInfo = getCategoryInfo(name); + catInfo.entry = entNum; + return readEntry(catInfo); +} + +std::unique_ptr ROOTFrameReader::readEntry(ROOTFrameReader::CategoryInfo& catInfo) { if (!catInfo.chain) { return nullptr; } diff --git a/src/SIOFrameReader.cc b/src/SIOFrameReader.cc index 5b82f216d..0ad6d281e 100644 --- a/src/SIOFrameReader.cc +++ b/src/SIOFrameReader.cc @@ -66,6 +66,13 @@ std::unique_ptr SIOFrameReader::readNextEntry(const std::string& n tableInfo._uncompressed_length); } +std::unique_ptr SIOFrameReader::readEntry(const std::string& name, const unsigned entry) { + // NOTE: Will create or overwrite the entry counter + // All checks are done in the following function + m_nameCtr[name] = entry; + return readNextEntry(name); +} + unsigned SIOFrameReader::getEntries(const std::string& name) const { return m_tocRecord.getNRecords(name); } diff --git a/tests/read_frame.h b/tests/read_frame.h index e9951c3b8..166bd3456 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -52,6 +52,27 @@ int read_frames(const std::string& filename) { return 1; } + // Reading specific (jumping to) entry + { + auto frame = podio::Frame(reader.readEntry("events", 4)); + processEvent(frame, 4, reader.currentFileVersion()); + // Reading the next entry after jump, continues from after the jump + auto nextFrame = podio::Frame(reader.readNextEntry("events")); + processEvent(nextFrame, 5, reader.currentFileVersion()); + + auto otherFrame = podio::Frame(reader.readEntry("other_events", 4)); + processEvent(otherFrame, 4 + 100, reader.currentFileVersion()); + // Jumping back also works + auto previousFrame = podio::Frame(reader.readEntry("other_events", 2)); + processEvent(previousFrame, 2 + 100, reader.currentFileVersion()); + + // Trying to read a Frame that is not present returns a nullptr + if (reader.readEntry("events", 10)) { + std::cerr << "Trying to read a specific entry that does not exist should return a nullptr" << std::endl; + return 1; + } + } + return 0; } From 381f97d698c2e3e2ee0828cbab0b8cc9dae8bc23 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 2 Nov 2022 11:30:59 +0100 Subject: [PATCH 011/100] Update github checkout action to latest version (#342) --- .github/workflows/coverity.yml | 2 +- .github/workflows/edm4hep.yaml | 32 +++++++++++++++++-------------- .github/workflows/key4hep.yml | 2 +- .github/workflows/pre-commit.yml | 2 +- .github/workflows/sanitizers.yaml | 2 +- .github/workflows/test.yml | 2 +- .github/workflows/ubuntu.yml | 2 +- 7 files changed, 24 insertions(+), 20 deletions(-) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index d7d5de93d..2a13c2380 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest if: github.repository == 'AIDASoft/podio' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: cvmfs-contrib/github-action-cvmfs@v3 - uses: aidasoft/run-lcg-view@v4 with: diff --git a/.github/workflows/edm4hep.yaml b/.github/workflows/edm4hep.yaml index e484144e4..06149dc90 100644 --- a/.github/workflows/edm4hep.yaml +++ b/.github/workflows/edm4hep.yaml @@ -11,7 +11,17 @@ jobs: LCG: ["LCG_102/x86_64-centos7-gcc11-opt", "LCG_102/x86_64-ubuntu2004-gcc9-opt"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 + with: + path: podio + - uses: actions/checkout@v3 + with: + repository: key4hep/EDM4hep + path: edm4hep + - uses: actions/checkout@v3 + with: + repository: catchorg/Catch2 + path: catch2 - uses: cvmfs-contrib/github-action-cvmfs@v3 - uses: aidasoft/run-lcg-view@v4 with: @@ -19,17 +29,14 @@ jobs: run: | STARTDIR=$(pwd) echo "::group::Build Catch2" - cd /home/runner/work/podio/ - git clone --branch v3.1.0 --depth 1 https://github.com/catchorg/Catch2 - cd Catch2 + cd $STARTDIR/catch2 mkdir build && cd build cmake -DCMAKE_CXX_STANDARD=17 -DCMAKE_INSTALL_PREFIX=../install -G Ninja .. ninja -k0 install - cd .. - export CMAKE_PREFIX_PATH=$(pwd)/install:$CMAKE_PREFIX_PATH + export CMAKE_PREFIX_PATH=$STARTDIR/catch2/install:$CMAKE_PREFIX_PATH echo "::endgroup::" echo "::group::Build podio" - cd $STARTDIR + cd $STARTDIR/podio mkdir build && cd build cmake -DENABLE_SIO=ON \ -DCMAKE_INSTALL_PREFIX=../install \ @@ -42,16 +49,13 @@ jobs: echo "::group::Test and install podio" ctest --output-on-failure ninja install - cd .. - export ROOT_INCLUDE_PATH=$(pwd)/install/include:$ROOT_INCLUDE_PATH:$CPATH + export ROOT_INCLUDE_PATH=$STARTDIR/podio/install/include:$ROOT_INCLUDE_PATH:$CPATH unset CPATH - export CMAKE_PREFIX_PATH=$(pwd)/install:$CMAKE_PREFIX_PATH - export LD_LIBRARY_PATH=$(pwd)/install/lib:$(pwd)/install/lib64:$LD_LIBRARY_PATH + export CMAKE_PREFIX_PATH=$STARTDIR/podio/install:$CMAKE_PREFIX_PATH + export LD_LIBRARY_PATH=$STARTDIR/podio/install/lib:$STARTDIR/podio/install/lib64:$LD_LIBRARY_PATH echo "::endgroup::" echo "::group::Build and test EDM4hep" - cd /home/runner/work/podio - git clone --depth 1 https://github.com/key4hep/EDM4hep - cd EDM4hep + cd $STARTDIR/edm4hep mkdir build && cd build cmake -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ diff --git a/.github/workflows/key4hep.yml b/.github/workflows/key4hep.yml index 0b2479581..dec780636 100644 --- a/.github/workflows/key4hep.yml +++ b/.github/workflows/key4hep.yml @@ -10,7 +10,7 @@ jobs: release: ["sw.hsf.org/key4hep", "sw-nightlies.hsf.org/key4hep"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: cvmfs-contrib/github-action-cvmfs@v3 - uses: aidasoft/run-lcg-view@v4 with: diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 3b867c880..5f8eaf5fe 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -6,7 +6,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: cvmfs-contrib/github-action-cvmfs@v3 - uses: aidasoft/run-lcg-view@v4 with: diff --git a/.github/workflows/sanitizers.yaml b/.github/workflows/sanitizers.yaml index 65904546d..e15f2eade 100644 --- a/.github/workflows/sanitizers.yaml +++ b/.github/workflows/sanitizers.yaml @@ -22,7 +22,7 @@ jobs: # - compiler: clang10 # sanitizer: MemoryWithOrigin steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: cvmfs-contrib/github-action-cvmfs@v3 - uses: aidasoft/run-lcg-view@v4 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2e507d0bc..21ab4d56c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,7 +14,7 @@ jobs: "dev4/x86_64-centos7-gcc11-opt", "dev4/x86_64-centos7-clang12-opt"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: cvmfs-contrib/github-action-cvmfs@v3 - uses: aidasoft/run-lcg-view@v4 with: diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 3d579dcb1..654b14a37 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -12,7 +12,7 @@ jobs: LCG: ["dev3/x86_64-ubuntu2004-gcc9-opt", "dev4/x86_64-ubuntu2004-gcc9-opt"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: cvmfs-contrib/github-action-cvmfs@v3 - uses: aidasoft/run-lcg-view@v4 with: From 785639278647f46ffb2850832b609e2a1ca6058f Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Wed, 2 Nov 2022 09:40:24 -0400 Subject: [PATCH 012/100] datamodel_syntax.md: fix a typo (#327) --- doc/datamodel_syntax.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/datamodel_syntax.md b/doc/datamodel_syntax.md index e1bcf49d4..e665860e6 100644 --- a/doc/datamodel_syntax.md +++ b/doc/datamodel_syntax.md @@ -149,7 +149,7 @@ It is possible to extend another datamodel with your own types, resp. use some d This can be useful for prototyping new datatypes or for accomodating special requirements without having to reimplement / copy a complete datamodel. To pass an upstream datamodel to the class generator use the `--upstream-edm` option that takes the package name as well as the yaml definition file of the upstream datamodel separated by a colon (':'). -This will effectively make all components and datatpes of the upstream datamodel available to the current definition for validation and generation of the necessary includes. +This will effectively make all components and datatypes of the upstream datamodel available to the current definition for validation and generation of the necessary includes. Nevertheless, only the code for the datatypes and components defined in the current yaml file will be generated. The podio `PODIO_GENERATE_DATAMODEL` cmake macro has gained an additional parameter `UPSTREAM_EDM` to pass the arguments to the generator via the cmake macros. From 77babd893d203c79dfa3e7aa37e79c4ae438a8c8 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 10 Nov 2022 09:18:33 +0100 Subject: [PATCH 013/100] Fix missing setting of relations in nested get calls (#349) --- include/podio/Frame.h | 2 +- tests/read_test.h | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/podio/Frame.h b/include/podio/Frame.h index 557eddceb..fc31b4cbe 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -352,7 +352,7 @@ bool Frame::FrameModel::get(int collectionID, CollectionBase*& colle const auto& name = m_idTable.name(collectionID); const auto& [_, inserted] = m_retrievedIDs.insert(collectionID); - if (!inserted) { + if (inserted) { auto coll = doGet(name); if (coll) { collection = coll; diff --git a/tests/read_test.h b/tests/read_test.h index 0e54f0adf..9cc677aaf 100644 --- a/tests/read_test.h +++ b/tests/read_test.h @@ -136,6 +136,32 @@ void processEvent(StoreT& store, int eventNum, podio::version::Version fileVersi throw std::runtime_error("Collection 'clusters' should be present"); } + if (fileVersion >= podio::version::Version{0, 13, 2}) { + // Read the mcParticleRefs before reading any of the other collections that + // are referenced to make sure that all the necessary relations are handled + // correctly + auto& mcpRefs = store.template get("mcParticleRefs"); + if (!mcpRefs.isValid()) { + throw std::runtime_error("Collection 'mcParticleRefs' should be present"); + } + + // Only doing a very basic check here, that mainly just ensures that the + // RelationRange is valid and does not segfault. + for (auto ref : mcpRefs) { + const auto daughters = ref.daughters(); + if (!daughters.empty()) { + // This will segfault in case things are not working + auto d [[maybe_unused]] = daughters[0]; + } + + const auto parents = ref.parents(); + if (!parents.empty()) { + // This will segfault in case things are not working + auto d [[maybe_unused]] = parents[0]; + } + } + } + auto& mcps = store.template get("mcparticles"); if (!mcps.isValid()) { throw std::runtime_error("Collection 'mcparticles' should be present"); From e8ae38b65ea81dc81d2fc3aebcf6c1679436a2e5 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 10 Nov 2022 17:34:18 +0100 Subject: [PATCH 014/100] Python bindings for Frame (#343) * Make python code a proper module * Reorganize CMake config for shared libraries * Refactor library and dict generation in cmake config * Add basic version of python bindings for working with Frames --- .gitignore | 1 + CMakeLists.txt | 2 + include/podio/Frame.h | 25 +++ include/podio/ROOTFrameReader.h | 5 + include/podio/SIOBlock.h | 5 + include/podio/SIOFrameReader.h | 5 + python/CMakeLists.txt | 29 ++- python/EventStore.py | 139 +------------ python/__init__.py.in | 1 + python/podio/EventStore.py | 137 +++++++++++++ python/podio/base_reader.py | 44 ++++ python/podio/frame.py | 178 ++++++++++++++++ python/podio/frame_iterator.py | 58 ++++++ python/{ => podio}/generator_utils.py | 0 python/{ => podio}/podio_config_reader.py | 2 +- python/podio/root_io.py | 28 +++ python/podio/sio_io.py | 25 +++ .../test_ClassDefinitionValidator.py | 4 +- python/{ => podio}/test_EventStore.py | 2 +- python/{ => podio}/test_EventStoreRoot.py | 4 +- python/{ => podio}/test_EventStoreSio.py | 8 +- python/podio/test_Frame.py | 95 +++++++++ python/{ => podio}/test_MemberParser.py | 2 +- python/podio/test_Reader.py | 70 +++++++ python/podio/test_ReaderRoot.py | 14 ++ python/podio/test_ReaderSio.py | 16 ++ python/podio/test_utils.py | 6 + python/podio_class_generator.py | 4 +- src/CMakeLists.txt | 194 ++++++++++-------- src/ROOTFrameReader.cc | 11 +- src/SIOBlock.cc | 10 + src/SIOFrameReader.cc | 4 + src/root_selection.xml | 6 + src/sio_selection.xml | 6 + tests/CMakeLists.txt | 9 +- tests/read.py | 2 +- tests/write_frame.h | 1 + tools/podio-dump | 2 +- 38 files changed, 909 insertions(+), 245 deletions(-) create mode 100644 python/__init__.py.in create mode 100644 python/podio/EventStore.py create mode 100644 python/podio/base_reader.py create mode 100644 python/podio/frame.py create mode 100644 python/podio/frame_iterator.py rename python/{ => podio}/generator_utils.py (100%) rename python/{ => podio}/podio_config_reader.py (99%) create mode 100644 python/podio/root_io.py create mode 100644 python/podio/sio_io.py rename python/{ => podio}/test_ClassDefinitionValidator.py (99%) rename python/{ => podio}/test_EventStore.py (98%) rename python/{ => podio}/test_EventStoreRoot.py (94%) rename python/{ => podio}/test_EventStoreSio.py (86%) create mode 100644 python/podio/test_Frame.py rename python/{ => podio}/test_MemberParser.py (99%) create mode 100644 python/podio/test_Reader.py create mode 100644 python/podio/test_ReaderRoot.py create mode 100644 python/podio/test_ReaderSio.py create mode 100644 python/podio/test_utils.py create mode 100644 src/root_selection.xml create mode 100644 src/sio_selection.xml diff --git a/.gitignore b/.gitignore index be893c085..568832339 100644 --- a/.gitignore +++ b/.gitignore @@ -61,3 +61,4 @@ spack* # Populated by cmake before build /include/podio/podioVersion.h +/python/podio/__init__.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 175085b87..c87330a8c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,6 +146,8 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/podioVersion.in.h ${CMAKE_CURRENT_SOURCE_DIR}/include/podio/podioVersion.h ) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/include/podio/podioVersion.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/podio ) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/python/__init__.py.in + ${CMAKE_CURRENT_SOURCE_DIR}/python/podio/__init__.py) #--- add license files --------------------------------------------------------- install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE diff --git a/include/podio/Frame.h b/include/podio/Frame.h index fc31b4cbe..5f2357dae 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -27,6 +27,10 @@ using EnableIfCollection = typename std::enable_if_t>; template using EnableIfCollectionRValue = typename std::enable_if_t && !std::is_lvalue_reference_v>; +/// Alias template for enabling overloads for r-values +template +using EnableIfRValue = typename std::enable_if_t>; + namespace detail { /** The minimal interface for raw data types */ @@ -152,6 +156,14 @@ class Frame { template Frame(std::unique_ptr); + /** Frame constructor from (almost) arbitrary raw data. + * + * This r-value overload is mainly present for enabling the python bindings, + * where cppyy seems to strip the std::unique_ptr somewhere in the process + */ + template > + Frame(FrameDataT&&); + // The frame is a non-copyable type Frame(const Frame&) = delete; Frame& operator=(const Frame&) = delete; @@ -167,6 +179,11 @@ class Frame { template > const CollT& get(const std::string& name) const; + /** Get a collection from the Frame. This is the pointer-to-base version for + * type-erased access (e.g. python interface) + */ + const podio::CollectionBase* get(const std::string& name) const; + /** (Destructively) move a collection into the Frame and get a const reference * back for further use */ @@ -259,6 +276,10 @@ template Frame::Frame(std::unique_ptr data) : m_self(std::make_unique>(std::move(data))) { } +template +Frame::Frame(FrameDataT&& data) : Frame(std::make_unique(std::move(data))) { +} + template const CollT& Frame::get(const std::string& name) const { const auto* coll = dynamic_cast(m_self->get(name)); @@ -270,6 +291,10 @@ const CollT& Frame::get(const std::string& name) const { return emptyColl; } +const podio::CollectionBase* Frame::get(const std::string& name) const { + return m_self->get(name); +} + void Frame::put(std::unique_ptr coll, const std::string& name) { const auto* retColl = m_self->put(std::move(coll), name); if (!retColl) { diff --git a/include/podio/ROOTFrameReader.h b/include/podio/ROOTFrameReader.h index 4eca496be..1850a0b02 100644 --- a/include/podio/ROOTFrameReader.h +++ b/include/podio/ROOTFrameReader.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -70,10 +71,14 @@ class ROOTFrameReader { /// Returns number of entries for the given name unsigned getEntries(const std::string& name) const; + /// Get the build version of podio that has been used to write the current file podio::version::Version currentFileVersion() const { return m_fileVersion; } + /// Get the names of all the availalable Frame categories in the current file(s) + std::vector getAvailableCategories() const; + private: /** * Helper struct to group together all the necessary state to read / process a diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index fdc4bd8a8..7c9b2a462 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace podio { @@ -230,6 +231,10 @@ class SIOFileTOCRecord { */ PositionType getPosition(const std::string& name, unsigned iEntry = 0) const; + /** Get all the record names that are stored in this TOC record + */ + std::vector getRecordNames() const; + private: friend struct SIOFileTOCRecordBlock; diff --git a/include/podio/SIOFrameReader.h b/include/podio/SIOFrameReader.h index 11b1676b6..d7a2c5e8c 100644 --- a/include/podio/SIOFrameReader.h +++ b/include/podio/SIOFrameReader.h @@ -9,6 +9,7 @@ #include #include +#include #include namespace podio { @@ -44,10 +45,14 @@ class SIOFrameReader { void openFile(const std::string& filename); + /// Get the build version of podio that has been used to write the current file podio::version::Version currentFileVersion() const { return m_fileVersion; } + /// Get the names of all the availalable Frame categories in the current file(s) + std::vector getAvailableCategories() const; + private: void readPodioHeader(); diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index fb6fc46b7..54547e833 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -2,19 +2,28 @@ SET(podio_PYTHON_INSTALLDIR python) SET(podio_PYTHON_INSTALLDIR ${podio_PYTHON_INSTALLDIR} PARENT_SCOPE) SET(podio_PYTHON_DIR ${CMAKE_CURRENT_LIST_DIR} PARENT_SCOPE) -file(GLOB to_install *.py figure.txt) - -# remove test_*.py file from being installed -foreach(file_path ${to_install}) - get_filename_component(file_name ${file_path} NAME) - string(REGEX MATCH test_.*\\.py$ FOUND_PY_TEST ${file_name}) - if (NOT "${FOUND_PY_TEST}" STREQUAL "") - list(REMOVE_ITEM to_install "${file_path}") - endif() -endforeach() +set(to_install + podio_class_generator.py + figure.txt + EventStore.py) install(FILES ${to_install} DESTINATION ${podio_PYTHON_INSTALLDIR}) +if(ENABLE_SIO) + install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/podio + DESTINATION ${podio_PYTHON_INSTALLDIR} + REGEX test_.*\\.py$ EXCLUDE # Do not install test files + PATTERN __pycache__ EXCLUDE # Or pythons caches + ) +else() + install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/podio + DESTINATION ${podio_PYTHON_INSTALLDIR} + REGEX test_.*\\.py$ EXCLUDE # Do not install test files + PATTERN __pycache__ EXCLUDE # Or pythons caches + REGEX .*sio.*\\.py$ EXCLUDE # All things sio related + ) +endif() + #--- install templates --------------------------------------------------------- install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/templates DESTINATION ${podio_PYTHON_INSTALLDIR}) diff --git a/python/EventStore.py b/python/EventStore.py index c1a2df3d4..5607f2da0 100644 --- a/python/EventStore.py +++ b/python/EventStore.py @@ -1,137 +1,6 @@ -"""Python EventStore for reading files with podio generated datamodels""" +"""Legacy import wrapper for EventStore.""" +import warnings +warnings.warn("You are using the legacy EventStore import. Switch to 'from podio import EventStore'", FutureWarning) -from ROOT import gSystem -gSystem.Load("libpodioPythonStore") # noqa: E402 -from ROOT import podio # noqa: E402 # pylint: disable=wrong-import-position - - -def size(self): - """Override size function that can be attached as __len__ method to - collections""" - return self.size() - - -def getitem(self, key): - """Override getitem function that can be attached as __getitem__ method to - collections (see below why this is necessary sometimes)""" - return self.at(key) - - -class EventStore: - '''Interface to events in an podio root file. - Example of use: - events = EventStore(["example.root", "example1.root"]) - for iev, store in islice(enumerate(events), 0, 2): - particles = store.get("GenParticle"); - for i, p in islice(enumerate(particles), 0, 5): - print "particle ", i, p.ID(), p.P4().Pt - ''' - - def __init__(self, filenames): - '''Create an event list from the podio root file. - Parameters: - filenames: list of root files - you can of course provide a list containing a single - root file. you could use the glob module to get all - files matching a wildcard pattern. - ''' - if isinstance(filenames, str): - filenames = (filenames,) - self.files = filenames - self.stores = [] - self.current_store = None - for fname in self.files: - store = podio.PythonEventStore(fname) - if store.isZombie(): - raise ValueError(fname + ' does not exist.') - store.name = fname - if self.current_store is None: - self.current_store = store - self.stores.append((store.getEntries(), store)) - - def __str__(self): - result = "Content:" - for item in self.current_store.getCollectionNames(): - result += f"\n\t{item}" - return result - - def get(self, name): - '''Returns a collection. - Parameters: - name: name of the collection in the podio root file. - ''' - coll = self.current_store.get(name) - # adding length function - coll.__len__ = size - # enabling the use of [] notation on the collection - # cppyy defines the __getitem__ method if the underlying c++ class has an operator[] - # method. For some reason they do not conform to the usual signature and only - # pass one argument to the function they call. Here we simply check if we have to - # define the __getitem__ for the collection. - if not hasattr(coll, '__getitem__'): - coll.__getitem__ = getitem - return coll - - def collections(self): - """Return list of all collection names.""" - return [str(c) for c in self.current_store.getCollectionNames()] - - def metadata(self): - """Get the metadata of the current event as GenericParameters""" - return self.current_store.getEventMetaData() - - def isValid(self): - """Check if the EventStore is in a valid state""" - return self.current_store is not None and self.current_store.isValid() - - # def __getattr__(self, name): - # '''missing attributes are taken from self.current_store''' - # if name != 'current_store': - # return getattr(self.current_store, name) - # else: - # return None - - def current_filename(self): - '''Returns the name of the current file.''' - if self.current_store is None: - return None - return self.current_store.fname - - def __enter__(self): - return self - - def __exit__(self, exception_type, exception_val, trace): - for store in self.stores: - store[1].close() - - def __iter__(self): - '''iterate on events in the tree. - ''' - for _, store in self.stores: - self.current_store = store - for _ in range(store.getEntries()): - yield store - store.endOfEvent() - - def __getitem__(self, evnum): - '''Get event number evnum''' - current_store = None - rel_evnum = evnum - for nev, store in self.stores: - if rel_evnum < nev: - current_store = store - break - rel_evnum -= nev - if current_store is None: - raise ValueError('event number too large: ' + str(evnum)) - self.current_store = current_store - self.current_store.goToEvent(rel_evnum) - return self - - def __len__(self): - '''Returns the total number of events in all files.''' - nevts_all_files = 0 - for nev, _ in self.stores: - nevts_all_files += nev - return nevts_all_files +from podio import EventStore # noqa: F401 # pylint: disable=wrong-import-position, unused-import diff --git a/python/__init__.py.in b/python/__init__.py.in new file mode 100644 index 000000000..e1f12ec32 --- /dev/null +++ b/python/__init__.py.in @@ -0,0 +1 @@ +__version__ = '@podio_VERSION@' diff --git a/python/podio/EventStore.py b/python/podio/EventStore.py new file mode 100644 index 000000000..c1a2df3d4 --- /dev/null +++ b/python/podio/EventStore.py @@ -0,0 +1,137 @@ +"""Python EventStore for reading files with podio generated datamodels""" + + +from ROOT import gSystem +gSystem.Load("libpodioPythonStore") # noqa: E402 +from ROOT import podio # noqa: E402 # pylint: disable=wrong-import-position + + +def size(self): + """Override size function that can be attached as __len__ method to + collections""" + return self.size() + + +def getitem(self, key): + """Override getitem function that can be attached as __getitem__ method to + collections (see below why this is necessary sometimes)""" + return self.at(key) + + +class EventStore: + '''Interface to events in an podio root file. + Example of use: + events = EventStore(["example.root", "example1.root"]) + for iev, store in islice(enumerate(events), 0, 2): + particles = store.get("GenParticle"); + for i, p in islice(enumerate(particles), 0, 5): + print "particle ", i, p.ID(), p.P4().Pt + ''' + + def __init__(self, filenames): + '''Create an event list from the podio root file. + Parameters: + filenames: list of root files + you can of course provide a list containing a single + root file. you could use the glob module to get all + files matching a wildcard pattern. + ''' + if isinstance(filenames, str): + filenames = (filenames,) + self.files = filenames + self.stores = [] + self.current_store = None + for fname in self.files: + store = podio.PythonEventStore(fname) + if store.isZombie(): + raise ValueError(fname + ' does not exist.') + store.name = fname + if self.current_store is None: + self.current_store = store + self.stores.append((store.getEntries(), store)) + + def __str__(self): + result = "Content:" + for item in self.current_store.getCollectionNames(): + result += f"\n\t{item}" + return result + + def get(self, name): + '''Returns a collection. + Parameters: + name: name of the collection in the podio root file. + ''' + coll = self.current_store.get(name) + # adding length function + coll.__len__ = size + # enabling the use of [] notation on the collection + # cppyy defines the __getitem__ method if the underlying c++ class has an operator[] + # method. For some reason they do not conform to the usual signature and only + # pass one argument to the function they call. Here we simply check if we have to + # define the __getitem__ for the collection. + if not hasattr(coll, '__getitem__'): + coll.__getitem__ = getitem + return coll + + def collections(self): + """Return list of all collection names.""" + return [str(c) for c in self.current_store.getCollectionNames()] + + def metadata(self): + """Get the metadata of the current event as GenericParameters""" + return self.current_store.getEventMetaData() + + def isValid(self): + """Check if the EventStore is in a valid state""" + return self.current_store is not None and self.current_store.isValid() + + # def __getattr__(self, name): + # '''missing attributes are taken from self.current_store''' + # if name != 'current_store': + # return getattr(self.current_store, name) + # else: + # return None + + def current_filename(self): + '''Returns the name of the current file.''' + if self.current_store is None: + return None + return self.current_store.fname + + def __enter__(self): + return self + + def __exit__(self, exception_type, exception_val, trace): + for store in self.stores: + store[1].close() + + def __iter__(self): + '''iterate on events in the tree. + ''' + for _, store in self.stores: + self.current_store = store + for _ in range(store.getEntries()): + yield store + store.endOfEvent() + + def __getitem__(self, evnum): + '''Get event number evnum''' + current_store = None + rel_evnum = evnum + for nev, store in self.stores: + if rel_evnum < nev: + current_store = store + break + rel_evnum -= nev + if current_store is None: + raise ValueError('event number too large: ' + str(evnum)) + self.current_store = current_store + self.current_store.goToEvent(rel_evnum) + return self + + def __len__(self): + '''Returns the total number of events in all files.''' + nevts_all_files = 0 + for nev, _ in self.stores: + nevts_all_files += nev + return nevts_all_files diff --git a/python/podio/base_reader.py b/python/podio/base_reader.py new file mode 100644 index 000000000..78549a5a4 --- /dev/null +++ b/python/podio/base_reader.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +"""Python module for defining the basic reader interface that is used by the +backend specific bindings""" + + +from podio.frame_iterator import FrameCategoryIterator + + +class BaseReaderMixin: + """Mixin class the defines the base interface of the readers. + + The backend specific readers inherit from here and have to initialize the + following members: + - _reader: The actual reader that is able to read frames + """ + + def __init__(self): + """Initialize common members. + + In inheriting classes this needs to be called **after** the _reader has been + setup. + """ + self._categories = tuple(s.data() for s in self._reader.getAvailableCategories()) + + @property + def categories(self): + """Get the available categories from this reader. + + Returns: + tuple(str): The names of the available categories from this reader + """ + return self._categories + + def get(self, category): + """Get an iterator with access functionality for a given category. + + Args: + category (str): The name of the desired category + + Returns: + FrameCategoryIterator: The iterator granting access to all Frames of the + desired category + """ + return FrameCategoryIterator(self._reader, category) diff --git a/python/podio/frame.py b/python/podio/frame.py new file mode 100644 index 000000000..903ccf41d --- /dev/null +++ b/python/podio/frame.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +"""Module for the python bindings of the podio::Frame""" + +# pylint: disable-next=import-error # gbl is a dynamic module from cppyy +from cppyy.gbl import std + +import ROOT +# NOTE: It is necessary that this can be found on the ROOT_INCLUDE_PATH +ROOT.gInterpreter.LoadFile('podio/Frame.h') # noqa: E402 +from ROOT import podio # noqa: E402 # pylint: disable=wrong-import-position + + +def _determine_supported_parameter_types(lang): + """Determine the supported types for the parameters. + + Args: + lang (str): Language for which the type names should be returned. Either + 'c++' or 'py'. + + Returns: + tuple (str): the tuple with the string representation of all **c++** + classes that are supported + """ + types_tuple = podio.SupportedGenericDataTypes() + n_types = std.tuple_size[podio.SupportedGenericDataTypes].value + + # Get the python types with the help of cppyy and the STL + py_types = (type(std.get[i](types_tuple)).__name__ for i in range(n_types)) + if lang == 'py': + return tuple(py_types) + if lang == 'c++': + # Map of types that need special care when going from python to c++ + py_to_cpp_type_map = { + 'str': 'std::string' + } + # Convert them to the corresponding c++ types + return tuple(py_to_cpp_type_map.get(t, t) for t in py_types) + + raise ValueError(f"lang needs to be 'py' or 'c++' (got {lang})") + + +SUPPORTED_PARAMETER_TYPES = _determine_supported_parameter_types('c++') +SUPPORTED_PARAMETER_PY_TYPES = _determine_supported_parameter_types('py') + + +class Frame: + """Frame class that serves as a container of collection and meta data.""" + + # Map that is necessary for easier disambiguation of parameters that are + # available with more than one type under the same name. Maps a python type to + # a c++ vector of the corresponding type + _py_to_cpp_type_map = { + pytype: f'std::vector<{cpptype}>' for (pytype, cpptype) in zip(SUPPORTED_PARAMETER_PY_TYPES, + SUPPORTED_PARAMETER_TYPES) + } + + def __init__(self, data=None): + """Create a Frame. + + Args: + data (FrameData, optional): Almost arbitrary FrameData, e.g. from file + """ + # Explicitly check for None here, to not return empty Frames on nullptr data + if data is not None: + self._frame = podio.Frame(data) + else: + self._frame = podio.Frame() + + self._collections = tuple(str(s) for s in self._frame.getAvailableCollections()) + self._param_key_types = self._init_param_keys() + + @property + def collections(self): + """Get the available collection (names) from this Frame. + + Returns: + tuple(str): The names of the available collections from this Frame. + """ + return self._collections + + def get(self, name): + """Get a collection from the Frame by name. + + Args: + name (str): The name of the desired collection + + Returns: + collection (podio.CollectionBase): The collection stored in the Frame + + Raises: + KeyError: If the collection with the name is not available + """ + collection = self._frame.get(name) + if not collection: + raise KeyError + return collection + + @property + def parameters(self): + """Get the available parameter names from this Frame. + + Returns: + tuple (str): The names of the available parameters from this Frame. + """ + return tuple(self._param_key_types.keys()) + + def get_parameter(self, name, as_type=None): + """Get the parameter stored under the given name. + + Args: + name (str): The name of the parameter + as_type (str, optional): Type specifier to disambiguate between + parameters with the same name but different types. If there is only + one parameter with a given name, this argument is ignored + + Returns: + int, float, str or list of those: The value of the stored parameter + + Raises: + KeyError: If no parameter is stored under the given name + ValueError: If there are multiple parameters with the same name, but + multiple types and no type specifier to disambiguate between them + has been passed. + + """ + def _get_param_value(par_type, name): + par_value = self._frame.getParameter[par_type](name) + if len(par_value) > 1: + return list(par_value) + return par_value[0] + + # This access already raises the KeyError if there is no such parameter + par_type = self._param_key_types[name] + # Exactly one parameter, nothing more to do here + if len(par_type) == 1: + return _get_param_value(par_type[0], name) + + if as_type is None: + raise ValueError(f'{name} parameter has {len(par_type)} different types available, ' + 'but no as_type argument to disambiguate') + + req_type = self._py_to_cpp_type_map.get(as_type, None) + if req_type is None: + raise ValueError(f'as_type value {as_type} cannot be mapped to a valid parameter type') + + if req_type not in par_type: + raise ValueError(f'{name} parameter is not available as type {as_type}') + + return _get_param_value(req_type, name) + + def _init_param_keys(self): + """Initialize the param keys dict for easier lookup of the available parameters. + + NOTE: This depends on a "side channel" that is usually reserved for the + writers but is currently still in the public interface of the Frame + + Returns: + dict: A dictionary mapping each key to the corresponding c++ type + """ + params = self._frame.getGenericParametersForWrite() # this is the iffy bit + keys_dict = {} + for par_type in SUPPORTED_PARAMETER_TYPES: + keys = params.getKeys[par_type]() + for key in keys: + # Make sure to convert to a python string here to not have a dangling + # reference here for the key. + key = str(key) + # In order to support the use case of having the same key for multiple + # types create a list of available types for the key, so that we can + # disambiguate later. Storing a vector here, and check later how + # many elements there actually are to decide whether to return a single + # value or a list + if key not in keys_dict: + keys_dict[key] = [f'std::vector<{par_type}>'] + else: + keys_dict[key].append(f'std::vector<{par_type}>') + + return keys_dict diff --git a/python/podio/frame_iterator.py b/python/podio/frame_iterator.py new file mode 100644 index 000000000..b3d9925ad --- /dev/null +++ b/python/podio/frame_iterator.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +"""Module defining the Frame iterator used by the Reader interface""" + +# pylint: disable-next=import-error # gbl is a dynamic module from cppyy +from cppyy.gbl import std +from podio.frame import Frame + + +class FrameCategoryIterator: + """Iterator for iterating over all Frames of a given category available from a + reader as well as accessing specific entries + """ + + def __init__(self, reader, category): + """Construct the iterator from the reader and the cateogry. + + Args: + reader (Reader): Any podio reader offering access to Frames + category (str): The category name of the Frames to be iterated over + """ + self._reader = reader + self._category = category + + def __iter__(self): + """The trivial implementaion for the iterator protocol.""" + return self + + def __next__(self): + """Get the next available Frame or stop.""" + frame_data = self._reader.readNextEntry(self._category) + if frame_data: + return Frame(std.move(frame_data)) + + raise StopIteration + + def __len__(self): + """Get the number of available Frames for the passed category.""" + return self._reader.getEntries(self._category) + + def __getitem__(self, entry): + """Get a specific entry. + + Args: + entry (int): The entry to access + """ + # Handle python negative indexing to start from the end + if entry < 0: + entry = self._reader.getEntries(self._category) + entry + + if entry < 0: + # If we are below 0 now, we do not have enough entries to serve the request + raise IndexError + + frame_data = self._reader.readEntry(self._category, entry) + if frame_data: + return Frame(std.move(frame_data)) + + raise IndexError diff --git a/python/generator_utils.py b/python/podio/generator_utils.py similarity index 100% rename from python/generator_utils.py rename to python/podio/generator_utils.py diff --git a/python/podio_config_reader.py b/python/podio/podio_config_reader.py similarity index 99% rename from python/podio_config_reader.py rename to python/podio/podio_config_reader.py index 8f81b45a2..14c34d0f5 100644 --- a/python/podio_config_reader.py +++ b/python/podio/podio_config_reader.py @@ -6,7 +6,7 @@ import warnings import yaml -from generator_utils import MemberVariable, DefinitionError, BUILTIN_TYPES, DataModel +from podio.generator_utils import MemberVariable, DefinitionError, BUILTIN_TYPES, DataModel class MemberParser: diff --git a/python/podio/root_io.py b/python/podio/root_io.py new file mode 100644 index 000000000..2a37906aa --- /dev/null +++ b/python/podio/root_io.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +"""Python module for reading root files containing podio Frames""" + +from podio.base_reader import BaseReaderMixin + +from ROOT import gSystem +gSystem.Load('libpodioRootIO') # noqa: E402 +from ROOT import podio # noqa: E402 # pylint: disable=wrong-import-position + +Writer = podio.ROOTFrameWriter + + +class Reader(BaseReaderMixin): + """Reader class for reading podio root files.""" + + def __init__(self, filenames): + """Create a reader that reads from the passed file(s). + + Args: + filenames (str or list[str]): file(s) to open and read data from + """ + if isinstance(filenames, str): + filenames = (filenames,) + + self._reader = podio.ROOTFrameReader() + self._reader.openFiles(filenames) + + super().__init__() diff --git a/python/podio/sio_io.py b/python/podio/sio_io.py new file mode 100644 index 000000000..24ce24df3 --- /dev/null +++ b/python/podio/sio_io.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +"""Python module for reading sio files containing podio Frames""" + +from podio.base_reader import BaseReaderMixin + +from ROOT import gSystem +gSystem.Load('libpodioSioIO') # noqa: 402 +from ROOT import podio # noqa: 402 # pylint: disable=wrong-import-position + +Writer = podio.SIOFrameWriter + + +class Reader(BaseReaderMixin): + """Reader class for readion podio SIO files.""" + + def __init__(self, filename): + """Create a reader that reads from the passed file. + + Args: + filename (str): File to open and read data from + """ + self._reader = podio.SIOFrameReader() + self._reader.openFile(filename) + + super().__init__() diff --git a/python/test_ClassDefinitionValidator.py b/python/podio/test_ClassDefinitionValidator.py similarity index 99% rename from python/test_ClassDefinitionValidator.py rename to python/podio/test_ClassDefinitionValidator.py index 5ddad9488..5db93c195 100644 --- a/python/test_ClassDefinitionValidator.py +++ b/python/podio/test_ClassDefinitionValidator.py @@ -8,8 +8,8 @@ import unittest from copy import deepcopy -from podio_config_reader import ClassDefinitionValidator, MemberVariable, DefinitionError -from generator_utils import DataModel +from podio.podio_config_reader import ClassDefinitionValidator, MemberVariable, DefinitionError +from podio.generator_utils import DataModel def make_dm(components, datatypes, options=None): diff --git a/python/test_EventStore.py b/python/podio/test_EventStore.py similarity index 98% rename from python/test_EventStore.py rename to python/podio/test_EventStore.py index 3202f4843..a8e4fb965 100644 --- a/python/test_EventStore.py +++ b/python/podio/test_EventStore.py @@ -1,6 +1,6 @@ """Unit tests for the EventStore class""" -from EventStore import EventStore +from podio.EventStore import EventStore class EventStoreBaseTestCaseMixin: diff --git a/python/test_EventStoreRoot.py b/python/podio/test_EventStoreRoot.py similarity index 94% rename from python/test_EventStoreRoot.py rename to python/podio/test_EventStoreRoot.py index 4acf74950..e9c334e4e 100644 --- a/python/test_EventStoreRoot.py +++ b/python/podio/test_EventStoreRoot.py @@ -6,8 +6,8 @@ from ROOT import TFile -from EventStore import EventStore -from test_EventStore import EventStoreBaseTestCaseMixin +from podio.EventStore import EventStore +from podio.test_EventStore import EventStoreBaseTestCaseMixin class EventStoreRootTestCase(EventStoreBaseTestCaseMixin, unittest.TestCase): diff --git a/python/test_EventStoreSio.py b/python/podio/test_EventStoreSio.py similarity index 86% rename from python/test_EventStoreSio.py rename to python/podio/test_EventStoreSio.py index 409039d95..1859fecf6 100644 --- a/python/test_EventStoreSio.py +++ b/python/podio/test_EventStoreSio.py @@ -4,11 +4,9 @@ import unittest import os -from EventStore import EventStore -from test_EventStore import EventStoreBaseTestCaseMixin - - -SKIP_SIO_TESTS = os.environ.get('SKIP_SIO_TESTS', '1') == '1' +from podio.EventStore import EventStore +from podio.test_EventStore import EventStoreBaseTestCaseMixin +from podio.test_utils import SKIP_SIO_TESTS @unittest.skipIf(SKIP_SIO_TESTS, "no SIO support") diff --git a/python/podio/test_Frame.py b/python/podio/test_Frame.py new file mode 100644 index 000000000..1390e08ce --- /dev/null +++ b/python/podio/test_Frame.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +"""Unit tests for python bindings of podio::Frame""" + +import unittest + +from podio.frame import Frame +# using root_io as that should always be present regardless of which backends are built +from podio.root_io import Reader + +# The expected collections in each frame +EXPECTED_COLL_NAMES = { + 'arrays', 'WithVectorMember', 'info', 'fixedWidthInts', 'mcparticles', + 'moreMCs', 'mcParticleRefs', 'hits', 'hitRefs', 'clusters', 'refs', 'refs2', + 'OneRelation', 'userInts', 'userDoubles', 'WithNamespaceMember', + 'WithNamespaceRelation', 'WithNamespaceRelationCopy' + } +# The expected parameter names in each frame +EXPECTED_PARAM_NAMES = {'anInt', 'UserEventWeight', 'UserEventName', 'SomeVectorData'} + + +class FrameTest(unittest.TestCase): + """General unittests for for python bindings of the Frame""" + def test_frame_invalid_access(self): + """Check that the advertised exceptions are raised on invalid access.""" + # Creat an empty Frame here + frame = Frame() + with self.assertRaises(KeyError): + _ = frame.get('NonExistantCollection') + + with self.assertRaises(KeyError): + _ = frame.get_parameter('NonExistantParameter') + + +class FrameReadTest(unittest.TestCase): + """Unit tests for the Frame python bindings for Frames read from file. + + NOTE: The assumption is that the Frame has been written by tests/write_frame.h + """ + def setUp(self): + """Open the file and read in the first frame internally. + + Reading only one event/Frame of each category here as looping and other + basic checks are already handled by the Reader tests + """ + reader = Reader('example_frame.root') + self.event = reader.get('events')[0] + self.other_event = reader.get('other_events')[7] + + def test_frame_collections(self): + """Check that all expected collections are available.""" + self.assertEqual(set(self.event.collections), EXPECTED_COLL_NAMES) + self.assertEqual(set(self.other_event.collections), EXPECTED_COLL_NAMES) + + # Not going over all collections here, as that should all be covered by the + # c++ test cases; Simply picking a few and doing some basic tests + mc_particles = self.event.get('mcparticles') + self.assertEqual(mc_particles.getValueTypeName(), 'ExampleMC') + self.assertEqual(len(mc_particles), 10) + self.assertEqual(len(mc_particles[0].daughters()), 4) + + mc_particle_refs = self.event.get('mcParticleRefs') + self.assertTrue(mc_particle_refs.isSubsetCollection()) + self.assertEqual(len(mc_particle_refs), 10) + + fixed_w_ints = self.event.get('fixedWidthInts') + self.assertEqual(len(fixed_w_ints), 3) + # Python has no concept of fixed width integers... + max_vals = fixed_w_ints[0] + self.assertEqual(max_vals.fixedInteger64(), 2**63 - 1) + self.assertEqual(max_vals.fixedU64(), 2**64 - 1) + + def test_frame_parameters(self): + """Check that all expected parameters are available.""" + self.assertEqual(set(self.event.parameters), EXPECTED_PARAM_NAMES) + self.assertEqual(set(self.other_event.parameters), EXPECTED_PARAM_NAMES) + + self.assertEqual(self.event.get_parameter('anInt'), 42) + self.assertEqual(self.other_event.get_parameter('anInt'), 42 + 107) + + self.assertEqual(self.event.get_parameter('UserEventWeight'), 0) + self.assertEqual(self.other_event.get_parameter('UserEventWeight'), 100. * 107) + + self.assertEqual(self.event.get_parameter('UserEventName'), ' event_number_0') + self.assertEqual(self.other_event.get_parameter('UserEventName'), ' event_number_107') + + with self.assertRaises(ValueError): + # Parameter name is available with multiple types + _ = self.event.get_parameter('SomeVectorData') + + with self.assertRaises(ValueError): + # Parameter not available as float (only int and string) + _ = self.event.get_parameter('SomeVectorData', as_type='float') + + self.assertEqual(self.event.get_parameter('SomeVectorData', as_type='int'), [1, 2, 3, 4]) + self.assertEqual(self.event.get_parameter('SomeVectorData', as_type='str'), ["just", "some", "strings"]) diff --git a/python/test_MemberParser.py b/python/podio/test_MemberParser.py similarity index 99% rename from python/test_MemberParser.py rename to python/podio/test_MemberParser.py index 572c33cd0..c7e4ec2bb 100644 --- a/python/test_MemberParser.py +++ b/python/podio/test_MemberParser.py @@ -6,7 +6,7 @@ import unittest -from podio_config_reader import MemberParser, DefinitionError +from podio.podio_config_reader import MemberParser, DefinitionError class MemberParserTest(unittest.TestCase): diff --git a/python/podio/test_Reader.py b/python/podio/test_Reader.py new file mode 100644 index 000000000..ff35ca0d4 --- /dev/null +++ b/python/podio/test_Reader.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +"""Unit tests for podio readers""" + + +class ReaderTestCaseMixin: + """Common unittests for readers. + + Inheriting actual test cases have to inhert from this and unittest.TestCase. + All test cases assume that the files are produced with the tests/write_frame.h + functionaltiy. The following members have to be setup and initialized by the + inheriting test cases: + - reader: a podio reader + """ + def test_categories(self): + """Make sure that the categories are as expected""" + reader_cats = self.reader.categories + self.assertEqual(len(reader_cats), 2) + + for cat in ('events', 'other_events'): + self.assertTrue(cat in reader_cats) + + def test_frame_iterator_valid_category(self): + """Check that the returned iterators returned by Reader.get behave as expected.""" + # NOTE: very basic iterator tests only, content tests are done elsewhere + frames = self.reader.get('other_events') + self.assertEqual(len(frames), 10) + + i = 0 + for frame in self.reader.get('events'): + # Rudimentary check here only to see whether we got the right frame + self.assertEqual(frame.get_parameter('UserEventName'), f' event_number_{i}') + i += 1 + self.assertEqual(i, 10) + + # Out of bound access should not work + with self.assertRaises(IndexError): + _ = frames[10] + with self.assertRaises(IndexError): + _ = frames[-11] + + # Again only rudimentary checks + frame = frames[7] + self.assertEqual(frame.get_parameter('UserEventName'), ' event_number_107') + # Valid negative indexing + frame = frames[-2] + self.assertEqual(frame.get_parameter('UserEventName'), ' event_number_108') + # jumping back again also works + frame = frames[3] + self.assertEqual(frame.get_parameter('UserEventName'), ' event_number_103') + + # Looping starts from where we left, i.e. here we have 6 frames left + i = 0 + for _ in frames: + i += 1 + self.assertEqual(i, 6) + + def test_frame_iterator_invalid_category(self): + """Make sure non existant Frames are handled gracefully""" + non_existant = self.reader.get('non-existant') + self.assertEqual(len(non_existant), 0) + + # Indexed access should obviously not work + with self.assertRaises(IndexError): + _ = non_existant[0] + + # Loops should never be entered + i = 0 + for _ in non_existant: + i += 1 + self.assertEqual(i, 0) diff --git a/python/podio/test_ReaderRoot.py b/python/podio/test_ReaderRoot.py new file mode 100644 index 000000000..5ad7d119b --- /dev/null +++ b/python/podio/test_ReaderRoot.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +"""Python unit tests for the ROOT backend (using Frames)""" + +import unittest + +from podio.root_io import Reader +from podio.test_Reader import ReaderTestCaseMixin + + +class RootReaderTestCase(ReaderTestCaseMixin, unittest.TestCase): + """Test cases for root input files""" + def setUp(self): + """Setup the corresponding reader""" + self.reader = Reader('example_frame.root') diff --git a/python/podio/test_ReaderSio.py b/python/podio/test_ReaderSio.py new file mode 100644 index 000000000..72a8c0d0e --- /dev/null +++ b/python/podio/test_ReaderSio.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +"""Python unit tests for the SIO backend (using Frames)""" + +import unittest + +from podio.sio_io import Reader +from podio.test_Reader import ReaderTestCaseMixin +from podio.test_utils import SKIP_SIO_TESTS + + +@unittest.skipIf(SKIP_SIO_TESTS, "no SIO support") +class SioReaderTestCase(ReaderTestCaseMixin, unittest.TestCase): + """Test cases for root input files""" + def setUp(self): + """Setup the corresponding reader""" + self.reader = Reader('example_frame.sio') diff --git a/python/podio/test_utils.py b/python/podio/test_utils.py new file mode 100644 index 000000000..2c5e282b6 --- /dev/null +++ b/python/podio/test_utils.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 +"""Utilities for python unittests""" + +import os + +SKIP_SIO_TESTS = os.environ.get('SKIP_SIO_TESTS', '1') == '1' diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index a4b587d4e..42187e8b0 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -15,8 +15,8 @@ import jinja2 -from podio_config_reader import PodioConfigReader -from generator_utils import DataType, DefinitionError +from podio.podio_config_reader import PodioConfigReader +from podio.generator_utils import DataType, DefinitionError THIS_DIR = os.path.dirname(os.path.abspath(__file__)) TEMPLATE_DIR = os.path.join(THIS_DIR, 'templates') diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 696831613..f8e320763 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,12 +1,71 @@ -# This is needed for older ROOTs which do not understand -# target usage requirements +# Helper function for adding two targets. A shared library and a corresponding +# ROOT dictionary that is necessary for e.g. python bindings. +# Arguments: +# libname The base name for the library (and target) +# headers The header files that should be passed to dictionary generation +# sources The source files for the shared libraries +# selection The selection xml passed to the dictionary generation +# +# The function creates the following targets +# The shared library. Also available under the podio:: alias +# This is not linked against anything and has include directories set +# to podio only. So some target_link_libraries are most likely to be +# done outside +# Dict The dictionary shared library. This is linked against podio::podio +# and the necessary ROOT libraries +# +# Additionally the following files are generated by root +# - DictDict.rootmap +# - Dict_rdict.pcm +# these files have to be installed to the same directory as the dictionary shared +# library +FUNCTION(PODIO_ADD_LIB_AND_DICT libname headers sources selection ) + # shared library + add_library(${libname} SHARED ${sources}) + add_library(podio::${libname} ALIAS ${libname}) + target_include_directories(${libname} PUBLIC + $ + $) -SET(sources + # dictionary + set(dictname ${libname}Dict) + add_library(${dictname} SHARED) + target_include_directories(${dictname} PUBLIC + $ + $) + target_link_libraries(${dictname} PUBLIC podio::${libname} podio::podio ROOT::Core ROOT::Tree) + PODIO_GENERATE_DICTIONARY(${dictname} ${headers} SELECTION ${selection} + OPTIONS --library ${CMAKE_SHARED_LIBRARY_PREFIX}${dictname}${CMAKE_SHARED_LIBRARY_SUFFIX} + ) + # prevent generating dictionary twice + set_target_properties(${dictname}-dictgen PROPERTIES EXCLUDE_FROM_ALL TRUE) + target_sources(${dictname} PRIVATE ${dictname}.cxx) +ENDFUNCTION() + + +# --- Core podio library and dictionary without I/O +SET(core_sources CollectionIDTable.cc GenericParameters.cc ASCIIWriter.cc EventStore.cc) +SET(core_headers + ${CMAKE_SOURCE_DIR}/include/podio/CollectionBase.h + ${CMAKE_SOURCE_DIR}/include/podio/CollectionIDTable.h + ${CMAKE_SOURCE_DIR}/include/podio/EventStore.h + ${CMAKE_SOURCE_DIR}/include/podio/ICollectionProvider.h + ${CMAKE_SOURCE_DIR}/include/podio/IReader.h + ${CMAKE_SOURCE_DIR}/include/podio/ObjectID.h + ${CMAKE_SOURCE_DIR}/include/podio/UserDataCollection.h + ${CMAKE_SOURCE_DIR}/include/podio/podioVersion.h + ) + +PODIO_ADD_LIB_AND_DICT(podio "${core_headers}" "${core_sources}" selection.xml) +target_compile_options(podio PRIVATE -pthread) + + +# --- Root I/O functionality and corresponding dictionary SET(root_sources rootUtils.h ROOTWriter.cc @@ -15,106 +74,81 @@ SET(root_sources ROOTFrameReader.cc ) -SET(sio_sources - SIOReader.cc - SIOWriter.cc - SIOBlockUserData.cc - SIOBlock.cc - SIOFrameWriter.cc - SIOFrameReader.cc - SIOFrameData.cc -) - -SET(python_sources - IOHelpers.cc - PythonEventStore.cc +SET(root_headers + ${CMAKE_SOURCE_DIR}/include/podio/ROOTFrameReader.h + ${CMAKE_SOURCE_DIR}/include/podio/ROOTFrameWriter.h ) -# Main Library, no external dependencies -add_library(podio SHARED ${sources}) -add_library(podio::podio ALIAS podio) -target_include_directories(podio PUBLIC - $ - $) -target_compile_options(podio PRIVATE -pthread) - -# Root dependency, mostly IO -add_library(podioRootIO SHARED ${root_sources}) -add_library(podio::podioRootIO ALIAS podioRootIO) +PODIO_ADD_LIB_AND_DICT(podioRootIO "${root_headers}" "${root_sources}" root_selection.xml) target_link_libraries(podioRootIO PUBLIC podio::podio ROOT::Core ROOT::RIO ROOT::Tree) -target_include_directories(podioRootIO PUBLIC - $ - $) - -# Dict Library -add_library(podioDict SHARED) -add_library(podio::podioDict ALIAS podioDict) -target_include_directories(podioDict PUBLIC - $ - $) -target_link_libraries(podioDict PUBLIC podio::podio ROOT::Core ROOT::Tree) - -SET(headers - ${CMAKE_SOURCE_DIR}/include/podio/CollectionBase.h - ${CMAKE_SOURCE_DIR}/include/podio/CollectionIDTable.h - ${CMAKE_SOURCE_DIR}/include/podio/EventStore.h - ${CMAKE_SOURCE_DIR}/include/podio/ICollectionProvider.h - ${CMAKE_SOURCE_DIR}/include/podio/IReader.h - ${CMAKE_SOURCE_DIR}/include/podio/ObjectID.h - ${CMAKE_SOURCE_DIR}/include/podio/UserDataCollection.h - ${CMAKE_SOURCE_DIR}/include/podio/podioVersion.h - ) -PODIO_GENERATE_DICTIONARY(podioDict ${headers} SELECTION selection.xml - OPTIONS --library ${CMAKE_SHARED_LIBRARY_PREFIX}podioDict${CMAKE_SHARED_LIBRARY_SUFFIX} - ) -# prevent generating dictionary twice -set_target_properties(podioDict-dictgen PROPERTIES EXCLUDE_FROM_ALL TRUE) -target_sources(podioDict PRIVATE podioDict.cxx) -add_library(podioPythonStore SHARED ${python_sources}) -target_link_libraries(podioPythonStore podio podioRootIO) -LIST(APPEND INSTALL_LIBRARIES podioPythonStore) +# --- Python EventStore for enabling (legacy) python bindings +SET(python_sources + IOHelpers.cc + PythonEventStore.cc + ) -add_library(podioPythonStoreDict SHARED) -target_include_directories(podioPythonStoreDict PUBLIC - $ - $ -) -target_link_libraries(podioPythonStoreDict PUBLIC podioPythonStore) SET(python_headers ${CMAKE_SOURCE_DIR}/include/podio/PythonEventStore.h ) -PODIO_GENERATE_DICTIONARY(podioPythonStoreDict ${python_headers} SELECTION python_selection.xml - OPTIONS --library ${CMAKE_SHARED_LIBRARY_PREFIX}podioPythonStoreDict${CMAKE_SHARED_MODULE_SUFFIX}) -set_target_properties(podioPythonStoreDict-dictgen PROPERTIES EXCLUDE_FROM_ALL TRUE) -target_sources(podioPythonStoreDict PRIVATE podioPythonStoreDict.cxx) +PODIO_ADD_LIB_AND_DICT(podioPythonStore "${python_headers}" "${python_sources}" python_selection.xml) +target_link_libraries(podioPythonStore PUBLIC podio::podio) +target_link_libraries(podioPythonStore PRIVATE podio::podioRootIO) -# SIO I/O library +# --- SIO I/O functionality and corresponding dictionary if(ENABLE_SIO) - add_library(podioSioIO SHARED ${sio_sources}) - add_library(podio::podioSioIO ALIAS podioSioIO) - - target_include_directories(podioSioIO PUBLIC - $ - $) + SET(sio_sources + SIOReader.cc + SIOWriter.cc + SIOBlockUserData.cc + SIOBlock.cc + SIOFrameWriter.cc + SIOFrameReader.cc + SIOFrameData.cc + ) + + SET(sio_headers + ${CMAKE_SOURCE_DIR}/include/podio/SIOFrameReader.h + ${CMAKE_SOURCE_DIR}/include/podio/SIOFrameWriter.h + ) + + PODIO_ADD_LIB_AND_DICT(podioSioIO "${sio_headers}" "${sio_sources}" sio_selection.xml) target_link_libraries(podioSioIO PUBLIC podio::podio SIO::sio ${CMAKE_DL_LIBS} ${PODIO_FS_LIBS}) - # also make the python EventStore understand SIO - target_link_libraries(podioPythonStore podioSioIO) + # Make sure the legacy python bindings know about the SIO backend + target_link_libraries(podioPythonStore PRIVATE podioSioIO) target_compile_definitions(podioPythonStore PRIVATE PODIO_ENABLE_SIO=1) - LIST(APPEND INSTALL_LIBRARIES podioSioIO) + LIST(APPEND INSTALL_LIBRARIES podioSioIO podioSioIODict) endif() -install(TARGETS podio podioDict podioPythonStoreDict podioRootIO ${INSTALL_LIBRARIES} +# --- Install everything +install(TARGETS podio podioDict podioPythonStore podioPythonStoreDict podioRootIO podioRootIODict ${INSTALL_LIBRARIES} EXPORT podioTargets DESTINATION "${CMAKE_INSTALL_LIBDIR}") -install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") +# Only install the necessary headers +if (ENABLE_SIO) + install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") +else() + install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + REGEX SIO.*\\.h$ EXCLUDE ) +endif() + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/podioDictDict.rootmap ${CMAKE_CURRENT_BINARY_DIR}/libpodioDict_rdict.pcm + ${CMAKE_CURRENT_BINARY_DIR}/podioRootIODictDict.rootmap + ${CMAKE_CURRENT_BINARY_DIR}/libpodioRootIODict_rdict.pcm ${CMAKE_CURRENT_BINARY_DIR}/podioPythonStoreDictDict.rootmap ${CMAKE_CURRENT_BINARY_DIR}/libpodioPythonStoreDict_rdict.pcm DESTINATION "${CMAKE_INSTALL_LIBDIR}") + +if (ENABLE_SIO) + install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/podioSioIODictDict.rootmap + ${CMAKE_CURRENT_BINARY_DIR}/libpodioSioIODict_rdict.pcm + DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ) +endif() diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index ce761bd4d..93ac7d9e8 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -214,7 +214,7 @@ void ROOTFrameReader::openFiles(const std::vector& filenames) { // Do some work up front for setting up categories and setup all the chains // and record the available categories. The rest of the setup follows on // demand when the category is first read - m_availCategories = getAvailableCategories(m_metaChain.get()); + m_availCategories = ::podio::getAvailableCategories(m_metaChain.get()); for (const auto& cat : m_availCategories) { auto [it, _] = m_categories.try_emplace(cat, std::make_unique(cat.c_str())); for (const auto& fn : filenames) { @@ -231,6 +231,15 @@ unsigned ROOTFrameReader::getEntries(const std::string& name) const { return 0; } +std::vector ROOTFrameReader::getAvailableCategories() const { + std::vector cats; + cats.reserve(m_categories.size()); + for (const auto& [cat, _] : m_categories) { + cats.emplace_back(cat); + } + return cats; +} + std::tuple, std::vector>> createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, const std::vector& collInfo) { diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index c521b694c..55fa62871 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -222,6 +222,16 @@ SIOFileTOCRecord::PositionType SIOFileTOCRecord::getPosition(const std::string& return 0; } +std::vector SIOFileTOCRecord::getRecordNames() const { + std::vector cats; + cats.reserve(m_recordMap.size()); + for (const auto& [cat, _] : m_recordMap) { + cats.emplace_back(cat); + } + + return cats; +} + void SIOFileTOCRecordBlock::read(sio::read_device& device, sio::version_type) { int size; device.data(size); diff --git a/src/SIOFrameReader.cc b/src/SIOFrameReader.cc index 0ad6d281e..7b87d31c3 100644 --- a/src/SIOFrameReader.cc +++ b/src/SIOFrameReader.cc @@ -73,6 +73,10 @@ std::unique_ptr SIOFrameReader::readEntry(const std::string& name, return readNextEntry(name); } +std::vector SIOFrameReader::getAvailableCategories() const { + return m_tocRecord.getRecordNames(); +} + unsigned SIOFrameReader::getEntries(const std::string& name) const { return m_tocRecord.getNRecords(name); } diff --git a/src/root_selection.xml b/src/root_selection.xml new file mode 100644 index 000000000..41fd14a8a --- /dev/null +++ b/src/root_selection.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/sio_selection.xml b/src/sio_selection.xml new file mode 100644 index 000000000..78d43d3ca --- /dev/null +++ b/src/sio_selection.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b67d80585..c82ba3092 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -164,15 +164,18 @@ if (TARGET read_sio) set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio write_timed_sio) endif() -add_test( NAME pyunittest COMMAND python -m unittest discover -s ${CMAKE_SOURCE_DIR}/python) +add_test( NAME pyunittest COMMAND python -m unittest discover -s ${CMAKE_SOURCE_DIR}/python/podio) set_property(TEST pyunittest PROPERTY ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH} - ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel + ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH} SKIP_SIO_TESTS=$> ) -set_property(TEST pyunittest PROPERTY DEPENDS write) +set_property(TEST pyunittest PROPERTY DEPENDS write write_frame_root) +if (TARGET write_sio) + set_property(TEST pyunittest PROPERTY DEPENDS write_sio write_frame_sio) +endif() # Customize CTest to potentially disable some of the tests with known problems configure_file(CTestCustom.cmake ${CMAKE_BINARY_DIR}/CTestCustom.cmake) diff --git a/tests/read.py b/tests/read.py index 63026b957..3428b3c60 100644 --- a/tests/read.py +++ b/tests/read.py @@ -4,7 +4,7 @@ from __future__ import absolute_import, unicode_literals, print_function -from EventStore import EventStore +from podio.EventStore import EventStore if __name__ == '__main__': diff --git a/tests/write_frame.h b/tests/write_frame.h index f7ccdc267..e72ae6b50 100644 --- a/tests/write_frame.h +++ b/tests/write_frame.h @@ -366,6 +366,7 @@ podio::Frame makeFrame(int iFrame) { frame.putParameter("UserEventWeight", 100.f * iFrame); frame.putParameter("UserEventName", " event_number_" + std::to_string(iFrame)); frame.putParameter("SomeVectorData", {1, 2, 3, 4}); + frame.putParameter("SomeVectorData", {"just", "some", "strings"}); return frame; } diff --git a/tools/podio-dump b/tools/podio-dump index ab4efbaf9..fcae97b1f 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """podio-dump tool to dump contents of podio files""" -from EventStore import EventStore +from podio.EventStore import EventStore def dump_evt_overview(event, ievt): From dd1576a11f9ea3d216bb9e8418bc1fec41b6d774 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 11 Nov 2022 18:04:30 +0100 Subject: [PATCH 015/100] Add a Frame reader for legacy files (#345) * Lift an existing root and sio legacy frame reader from git * Add python bindings for legacy readers --- include/podio/ROOTLegacyReader.h | 125 +++++++++++++++++ include/podio/SIOLegacyReader.h | 95 +++++++++++++ python/podio/root_io.py | 22 +++ python/podio/sio_io.py | 19 +++ python/podio/test_Reader.py | 33 +++++ python/podio/test_ReaderRoot.py | 11 +- python/podio/test_ReaderSio.py | 12 +- src/CMakeLists.txt | 5 + src/ROOTLegacyReader.cc | 233 +++++++++++++++++++++++++++++++ src/SIOFrameReader.cc | 24 +--- src/SIOLegacyReader.cc | 132 +++++++++++++++++ src/root_selection.xml | 1 + src/sioUtils.h | 33 +++++ src/sio_selection.xml | 1 + tests/CMakeLists.txt | 8 +- tests/CTestCustom.cmake | 2 + tests/read_frame_legacy_root.cpp | 43 ++++++ tests/read_frame_legacy_sio.cpp | 43 ++++++ 18 files changed, 814 insertions(+), 28 deletions(-) create mode 100644 include/podio/ROOTLegacyReader.h create mode 100644 include/podio/SIOLegacyReader.h create mode 100644 src/ROOTLegacyReader.cc create mode 100644 src/SIOLegacyReader.cc create mode 100644 src/sioUtils.h create mode 100644 tests/read_frame_legacy_root.cpp create mode 100644 tests/read_frame_legacy_sio.cpp diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h new file mode 100644 index 000000000..06f9c015b --- /dev/null +++ b/include/podio/ROOTLegacyReader.h @@ -0,0 +1,125 @@ +#ifndef PODIO_ROOTLEGACYREADER_H +#define PODIO_ROOTLEGACYREADER_H + +#include "podio/CollectionBranches.h" +#include "podio/ROOTFrameData.h" +#include "podio/podioVersion.h" + +#include "TChain.h" + +#include +#include +#include +#include +#include +#include + +// forward declarations +class TClass; +// class TChain; +class TFile; +class TTree; + +namespace podio { + +namespace detail { + // Information about the data vector as wall as the collection class type + // and the index in the collection branches cache vector + using CollectionInfo = std::tuple; + +} // namespace detail + +class EventStore; +class CollectionBase; +class CollectionIDTable; +class GenericParameters; +struct CollectionReadBuffers; + +/** + * A root reader for reading legacy podio root files that have been written + * using the legacy, non Frame based I/O model. This reader grants Frame based + * access to those files, by mimicking the Frame I/O functionality and the + * interfaces of those readers. + * + * NOTE: Since there was only one category ("events") for those legacy podio + * files this reader will really only work if you try to read that category, and + * will simply return no data if you try to read anything else. + */ +class ROOTLegacyReader { + +public: + ROOTLegacyReader() = default; + ~ROOTLegacyReader() = default; + + // non-copyable + ROOTLegacyReader(const ROOTLegacyReader&) = delete; + ROOTLegacyReader& operator=(const ROOTLegacyReader&) = delete; + + void openFile(const std::string& filename); + + void openFiles(const std::vector& filenames); + + /** + * Read the next data entry from which a Frame can be constructed. In case + * there are no more entries left, this returns a nullptr. + * + * NOTE: the category name has to be "events" in this case, as only that + * category is available for legacy files. + */ + std::unique_ptr readNextEntry(const std::string&); + + /** + * Read the specified data entry from which a Frame can be constructed In case + * the entry does not exist, this returns a nullptr. + * + * NOTE: the category name has to be "events" in this case, as only that + * category is available for legacy files. + */ + std::unique_ptr readEntry(const std::string&, const unsigned entry); + + /// Returns number of entries for a given category + unsigned getEntries(const std::string&) const; + + /// Get the build version of podio that has been used to write the current file + podio::version::Version currentFileVersion() const { + return m_fileVersion; + } + + /// Get the names of all the availalable Frame categories in the current file(s) + std::vector getAvailableCategories() const; + +private: + std::pair getLocalTreeAndEntry(const std::string& treename); + + void createCollectionBranches(const std::vector>& collInfo); + + podio::GenericParameters readEventMetaData(); + + podio::CollectionReadBuffers getCollectionBuffers(const std::pair& collInfo); + + std::unique_ptr readEntry(); + + // cache the necessary information to more quickly construct and read each + // collection after it has been read the very first time + std::vector> m_storedClasses{}; + + std::shared_ptr m_table{nullptr}; + std::unique_ptr m_chain{nullptr}; + unsigned m_eventNumber{0}; + + // Similar to writing we cache the branches that belong to each collection + // in order to not having to look them up every event. However, for the + // reader we cannot guarantee a fixed order of collections as they are read + // on demand. Hence, we give each collection an index the first time it is + // read and we start caching the branches. + std::vector m_collectionBranches{}; + + podio::version::Version m_fileVersion{0, 0, 0}; + + /// The **only** category name that is available from legacy files + constexpr static auto m_categoryName = "events"; +}; + +} // namespace podio + +#endif // PODIO_ROOTLEGACYREADER_H diff --git a/include/podio/SIOLegacyReader.h b/include/podio/SIOLegacyReader.h new file mode 100644 index 000000000..5263bfc49 --- /dev/null +++ b/include/podio/SIOLegacyReader.h @@ -0,0 +1,95 @@ +#ifndef PODIO_SIOLEGACYREADER_H +#define PODIO_SIOLEGACYREADER_H + +#include "podio/SIOBlock.h" +#include "podio/SIOFrameData.h" +#include "podio/podioVersion.h" + +#include + +#include +#include +#include + +namespace podio { + +class CollectionIDTable; + +/** + * A SIO reader for reading legacy podio .sio files that have been written using + * the legacy, non Frame based I/O model. This reader grants Frame based access + * to those files, by mimicking Frame I/O functionality and the interfaces of + * those readers. + * + * NOTE: Since there was only one category ("events") for those legacy podio + * files this reader will really only work if you try to read that category, and + * will simply return no data if you try to read anything else. + */ +class SIOLegacyReader { + +public: + SIOLegacyReader(); + ~SIOLegacyReader() = default; + + // non copy-able + SIOLegacyReader(const SIOLegacyReader&) = delete; + SIOLegacyReader& operator=(const SIOLegacyReader&) = delete; + + /** + * Read the next data entry from which a Frame can be constructed. In case + * there are no more entries left, this returns a nullptr. + * + * NOTE: the category name has to be "events" in this case, as only that + * category is available for legacy files. + */ + std::unique_ptr readNextEntry(const std::string&); + + /** + * Read the specified data entry from which a Frame can be constructed In case + * the entry does not exist, this returns a nullptr. + * + * NOTE: the category name has to be "events" in this case, as only that + * category is available for legacy files. + */ + std::unique_ptr readEntry(const std::string&, const unsigned entry); + + /// Returns the number of + unsigned getEntries(const std::string& name) const; + + void openFile(const std::string& filename); + + /// Get the build version of podio that has been used to write the current file + podio::version::Version currentFileVersion() const { + return m_fileVersion; + } + + /// Get the names of all the availalable Frame categories in the current file(s) + std::vector getAvailableCategories() const; + +private: + /// read the TOC record + bool readFileTOCRecord(); + + void readCollectionIDTable(); + + sio::ifstream m_stream{}; + + // TODO: Move these somewhere else + std::vector m_typeNames{}; + std::vector m_subsetCollectionBits{}; + + sio::buffer m_tableBuffer{1}; ///< The buffer holding the **compressed** CollectionIDTable + unsigned m_tableUncLength{0}; ///< The uncompressed length of the tableBuffer + + std::shared_ptr m_table{nullptr}; + unsigned m_eventNumber{0}; + + SIOFileTOCRecord m_tocRecord{}; + podio::version::Version m_fileVersion{0}; + + constexpr static auto m_categoryName = "events"; +}; + +} // namespace podio + +#endif // PODIO_SIOLEGACYREADER_H diff --git a/python/podio/root_io.py b/python/podio/root_io.py index 2a37906aa..fd536a013 100644 --- a/python/podio/root_io.py +++ b/python/podio/root_io.py @@ -26,3 +26,25 @@ def __init__(self, filenames): self._reader.openFiles(filenames) super().__init__() + + +class LegacyReader(BaseReaderMixin): + """Reader class for reading legacy podio root files. + + This reader can be used to read files that have not yet been written using + Frame based I/O into Frames for a more seamless transition. + """ + + def __init__(self, filenames): + """Create a reader that reads from the passed file(s). + + Args: + filenames (str or list[str]): file(s) to open and read data from + """ + if isinstance(filenames, str): + filenames = (filenames,) + + self._reader = podio.ROOTLegacyReader() + self._reader.openFiles(filenames) + + super().__init__() diff --git a/python/podio/sio_io.py b/python/podio/sio_io.py index 24ce24df3..4c9ec1ab2 100644 --- a/python/podio/sio_io.py +++ b/python/podio/sio_io.py @@ -23,3 +23,22 @@ def __init__(self, filename): self._reader.openFile(filename) super().__init__() + + +class LegacyReader(BaseReaderMixin): + """Reader class for reading legcy podio sio files. + + This reader can be used to read files that have not yet been written using the + Frame based I/O into Frames for a more seamless transition. + """ + + def __init__(self, filename): + """Create a reader that reads from the passed file. + + Args: + filename (str): File to open and read data from + """ + self._reader = podio.SIOLegacyReader() + self._reader.openFile(filename) + + super().__init__() diff --git a/python/podio/test_Reader.py b/python/podio/test_Reader.py index ff35ca0d4..eca8552c8 100644 --- a/python/podio/test_Reader.py +++ b/python/podio/test_Reader.py @@ -68,3 +68,36 @@ def test_frame_iterator_invalid_category(self): for _ in non_existant: i += 1 self.assertEqual(i, 0) + + +class LegacyReaderTestCaseMixin: + """Common test cases for the legacy readers python bindings. + + These tests assume that input files are produced with the write_test.h header + and that inheriting test cases inherit from unittes.TestCase as well. + Additionally they have to have an initialized reader as a member. + + NOTE: Since the legacy readers also use the BaseReaderMixin, many of the + invalid access test cases are already covered by the ReaderTestCaseMixin and + here we simply focus on the slightly different happy paths + """ + def test_categories(self): + """Make sure the legacy reader returns only one category""" + cats = self.reader.categories + self.assertEqual(("events",), cats) + + def test_frame_iterator(self): + """Make sure the FrameIterator works.""" + frames = self.reader.get('events') + self.assertEqual(len(frames), 2000) + + for i, frame in enumerate(frames): + # Rudimentary check here only to see whether we got the right frame + self.assertEqual(frame.get_parameter('UserEventName'), f' event_number_{i}') + # Only check a few Frames here + if i > 10: + break + + # Index based access + frame = frames[123] + self.assertEqual(frame.get_parameter('UserEventName'), ' event_number_123') diff --git a/python/podio/test_ReaderRoot.py b/python/podio/test_ReaderRoot.py index 5ad7d119b..ad7c8000a 100644 --- a/python/podio/test_ReaderRoot.py +++ b/python/podio/test_ReaderRoot.py @@ -3,8 +3,8 @@ import unittest -from podio.root_io import Reader -from podio.test_Reader import ReaderTestCaseMixin +from podio.root_io import Reader, LegacyReader +from podio.test_Reader import ReaderTestCaseMixin, LegacyReaderTestCaseMixin class RootReaderTestCase(ReaderTestCaseMixin, unittest.TestCase): @@ -12,3 +12,10 @@ class RootReaderTestCase(ReaderTestCaseMixin, unittest.TestCase): def setUp(self): """Setup the corresponding reader""" self.reader = Reader('example_frame.root') + + +class RootLegacyReaderTestCase(LegacyReaderTestCaseMixin, unittest.TestCase): + """Test cases for the legacy root input files and reader.""" + def setUp(self): + """Setup a reader, reading from the example files""" + self.reader = LegacyReader('example.root') diff --git a/python/podio/test_ReaderSio.py b/python/podio/test_ReaderSio.py index 72a8c0d0e..3d1f35eae 100644 --- a/python/podio/test_ReaderSio.py +++ b/python/podio/test_ReaderSio.py @@ -3,8 +3,8 @@ import unittest -from podio.sio_io import Reader -from podio.test_Reader import ReaderTestCaseMixin +from podio.sio_io import Reader, LegacyReader +from podio.test_Reader import ReaderTestCaseMixin, LegacyReaderTestCaseMixin from podio.test_utils import SKIP_SIO_TESTS @@ -14,3 +14,11 @@ class SioReaderTestCase(ReaderTestCaseMixin, unittest.TestCase): def setUp(self): """Setup the corresponding reader""" self.reader = Reader('example_frame.sio') + + +@unittest.skipIf(SKIP_SIO_TESTS, "no SIO support") +class SIOLegacyReaderTestCase(LegacyReaderTestCaseMixin, unittest.TestCase): + """Test cases for the legacy root input files and reader.""" + def setUp(self): + """Setup a reader, reading from the example files""" + self.reader = LegacyReader('example.sio') diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f8e320763..d0017cba7 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -72,10 +72,12 @@ SET(root_sources ROOTReader.cc ROOTFrameWriter.cc ROOTFrameReader.cc + ROOTLegacyReader.cc ) SET(root_headers ${CMAKE_SOURCE_DIR}/include/podio/ROOTFrameReader.h + ${CMAKE_SOURCE_DIR}/include/podio/ROOTLegacyReader.h ${CMAKE_SOURCE_DIR}/include/podio/ROOTFrameWriter.h ) @@ -106,10 +108,13 @@ if(ENABLE_SIO) SIOFrameWriter.cc SIOFrameReader.cc SIOFrameData.cc + sioUtils.h + SIOLegacyReader.cc ) SET(sio_headers ${CMAKE_SOURCE_DIR}/include/podio/SIOFrameReader.h + ${CMAKE_SOURCE_DIR}/include/podio/SIOLegacyReader.h ${CMAKE_SOURCE_DIR}/include/podio/SIOFrameWriter.h ) diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc new file mode 100644 index 000000000..ac718d14a --- /dev/null +++ b/src/ROOTLegacyReader.cc @@ -0,0 +1,233 @@ +#include "podio/CollectionBuffers.h" +#include "podio/ROOTFrameData.h" +#include "rootUtils.h" + +// podio specific includes +#include "podio/CollectionBase.h" +#include "podio/CollectionIDTable.h" +#include "podio/GenericParameters.h" +#include "podio/ROOTLegacyReader.h" + +// ROOT specific includes +#include "TChain.h" +#include "TClass.h" +#include "TFile.h" +#include "TTree.h" +#include "TTreeCache.h" + +#include + +namespace podio { + +std::unique_ptr ROOTLegacyReader::readNextEntry(const std::string& name) { + if (name != m_categoryName) { + return nullptr; + } + return readEntry(); +} + +std::unique_ptr ROOTLegacyReader::readEntry(const std::string& name, unsigned entry) { + if (name != m_categoryName) { + return nullptr; + } + m_eventNumber = entry; + return readEntry(); +} + +std::unique_ptr ROOTLegacyReader::readEntry() { + ROOTFrameData::BufferMap buffers; + for (const auto& collInfo : m_storedClasses) { + buffers.emplace(collInfo.first, getCollectionBuffers(collInfo)); + } + + auto parameters = readEventMetaData(); + + m_eventNumber++; + return std::make_unique(std::move(buffers), m_table, std::move(parameters)); +} + +podio::CollectionReadBuffers +ROOTLegacyReader::getCollectionBuffers(const std::pair& collInfo) { + const auto& name = collInfo.first; + const auto& [theClass, collectionClass, index] = collInfo.second; + auto& branches = m_collectionBranches[index]; + + // Create empty collection buffers, and connect them to the right branches + auto collBuffers = podio::CollectionReadBuffers(); + // If we have a valid data buffer class we know that have to read data, + // otherwise we are handling a subset collection + const bool isSubsetColl = theClass == nullptr; + if (!isSubsetColl) { + collBuffers.data = theClass->New(); + } + + { + auto collection = + std::unique_ptr(static_cast(collectionClass->New())); + collection->setSubsetCollection(isSubsetColl); + + auto tmpBuffers = collection->createBuffers(); + collBuffers.createCollection = std::move(tmpBuffers.createCollection); + collBuffers.recast = std::move(tmpBuffers.recast); + + if (auto* refs = tmpBuffers.references) { + collBuffers.references = new podio::CollRefCollection(refs->size()); + } + if (auto* vminfo = tmpBuffers.vectorMembers) { + collBuffers.vectorMembers = new podio::VectorMembersInfo(); + collBuffers.vectorMembers->reserve(vminfo->size()); + + for (const auto& [type, _] : (*vminfo)) { + const auto* vecClass = TClass::GetClass(("vector<" + type + ">").c_str()); + collBuffers.vectorMembers->emplace_back(type, vecClass->New()); + } + } + } + + const auto localEntry = m_chain->LoadTree(m_eventNumber); + // After switching trees in the chain, branch pointers get invalidated so + // they need to be reassigned. + // NOTE: root 6.22/06 requires that we get completely new branches here, + // with 6.20/04 we could just re-set them + if (localEntry == 0) { + branches.data = root_utils::getBranch(m_chain.get(), name.c_str()); + + // reference collections + if (auto* refCollections = collBuffers.references) { + for (size_t i = 0; i < refCollections->size(); ++i) { + const auto brName = root_utils::refBranch(name, i); + branches.refs[i] = root_utils::getBranch(m_chain.get(), brName.c_str()); + } + } + + // vector members + if (auto* vecMembers = collBuffers.vectorMembers) { + for (size_t i = 0; i < vecMembers->size(); ++i) { + const auto brName = root_utils::vecBranch(name, i); + branches.vecs[i] = root_utils::getBranch(m_chain.get(), brName.c_str()); + } + } + } + + // set the addresses and read the data + root_utils::setCollectionAddresses(collBuffers, branches); + root_utils::readBranchesData(branches, localEntry); + + collBuffers.recast(collBuffers); + + return collBuffers; +} + +podio::GenericParameters ROOTLegacyReader::readEventMetaData() { + GenericParameters params; + auto [tree, entry] = getLocalTreeAndEntry("evt_metadata"); + auto* branch = root_utils::getBranch(tree, "evtMD"); + auto* emd = ¶ms; + branch->SetAddress(&emd); + branch->GetEntry(entry); + return params; +} + +void ROOTLegacyReader::openFile(const std::string& filename) { + openFiles({filename}); +} + +void ROOTLegacyReader::openFiles(const std::vector& filenames) { + m_chain = std::make_unique("events"); + for (const auto& filename : filenames) { + m_chain->Add(filename.c_str()); + } + + // read the meta data and build the collectionBranches cache + // NOTE: This is a small pessimization, if we do not read all collections + // afterwards, but it makes the handling much easier in general + auto metadatatree = static_cast(m_chain->GetFile()->Get("metadata")); + m_table = std::make_shared(); + auto* table = m_table.get(); + metadatatree->SetBranchAddress("CollectionIDs", &table); + + podio::version::Version* versionPtr{nullptr}; + if (auto* versionBranch = root_utils::getBranch(metadatatree, "PodioVersion")) { + versionBranch->SetAddress(&versionPtr); + } + + // Check if the CollectionTypeInfo branch is there and assume that the file + // has been written with with podio pre #197 (<0.13.1) if that is not the case + if (auto* collInfoBranch = root_utils::getBranch(metadatatree, "CollectionTypeInfo")) { + auto collectionInfo = new std::vector; + collInfoBranch->SetAddress(&collectionInfo); + metadatatree->GetEntry(0); + createCollectionBranches(*collectionInfo); + delete collectionInfo; + } else { + std::cout << "PODIO: Reconstructing CollectionTypeInfo branch from other sources in file: \'" + << m_chain->GetFile()->GetName() << "\'" << std::endl; + metadatatree->GetEntry(0); + const auto collectionInfo = root_utils::reconstructCollectionInfo(m_chain.get(), *m_table); + createCollectionBranches(collectionInfo); + } + + m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0}; + delete versionPtr; +} + +unsigned ROOTLegacyReader::getEntries(const std::string& name) const { + if (name != m_categoryName) { + return 0; + } + return m_chain->GetEntries(); +} + +void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { + size_t collectionIndex{0}; + + for (const auto& [collID, collType, isSubsetColl] : collInfo) { + // We only write collections that are in the collectionIDTable, so no need + // to check here + const auto name = m_table->name(collID); + + root_utils::CollectionBranches branches{}; + const auto collectionClass = TClass::GetClass(collType.c_str()); + + // Need the collection here to setup all the branches. Have to manage the + // temporary collection ourselves + auto collection = + std::unique_ptr(static_cast(collectionClass->New())); + collection->setSubsetCollection(isSubsetColl); + + if (!isSubsetColl) { + // This branch is guaranteed to exist since only collections that are + // also written to file are in the info metadata that we work with here + branches.data = root_utils::getBranch(m_chain.get(), name.c_str()); + } + + const auto buffers = collection->getBuffers(); + for (size_t i = 0; i < buffers.references->size(); ++i) { + const auto brName = root_utils::refBranch(name, i); + branches.refs.push_back(root_utils::getBranch(m_chain.get(), brName.c_str())); + } + + for (size_t i = 0; i < buffers.vectorMembers->size(); ++i) { + const auto brName = root_utils::vecBranch(name, i); + branches.vecs.push_back(root_utils::getBranch(m_chain.get(), brName.c_str())); + } + + const std::string bufferClassName = "std::vector<" + collection->getDataTypeName() + ">"; + const auto bufferClass = isSubsetColl ? nullptr : TClass::GetClass(bufferClassName.c_str()); + + m_storedClasses.emplace_back(name, std::make_tuple(bufferClass, collectionClass, collectionIndex++)); + m_collectionBranches.push_back(branches); + } +} + +std::pair ROOTLegacyReader::getLocalTreeAndEntry(const std::string& treename) { + auto localEntry = m_chain->LoadTree(m_eventNumber); + auto* tree = static_cast(m_chain->GetFile()->Get(treename.c_str())); + return {tree, localEntry}; +} + +std::vector ROOTLegacyReader::getAvailableCategories() const { + return {m_categoryName}; +} + +} // namespace podio diff --git a/src/SIOFrameReader.cc b/src/SIOFrameReader.cc index 7b87d31c3..d987d6a74 100644 --- a/src/SIOFrameReader.cc +++ b/src/SIOFrameReader.cc @@ -1,35 +1,15 @@ #include "podio/SIOFrameReader.h" #include "podio/SIOBlock.h" +#include "sioUtils.h" + #include -#include #include #include namespace podio { -namespace sio_utils { - // Read the record into a buffer and potentially uncompress it - std::pair readRecord(sio::ifstream& stream, bool decompress = true, - std::size_t initBufferSize = sio::mbyte) { - sio::record_info recInfo; - sio::buffer infoBuffer{sio::max_record_info_len}; - sio::buffer recBuffer{initBufferSize}; - sio::api::read_record_info(stream, recInfo, infoBuffer); - sio::api::read_record_data(stream, recInfo, recBuffer); - - if (decompress) { - sio::buffer uncBuffer{recInfo._uncompressed_length}; - sio::zlib_compression compressor; - compressor.uncompress(recBuffer.span(), uncBuffer); - return std::make_pair(std::move(uncBuffer), recInfo); - } - - return std::make_pair(std::move(recBuffer), recInfo); - } -} // namespace sio_utils - SIOFrameReader::SIOFrameReader() { auto& libLoader [[maybe_unused]] = SIOBlockLibraryLoader::instance(); } diff --git a/src/SIOLegacyReader.cc b/src/SIOLegacyReader.cc new file mode 100644 index 000000000..d5329ca5d --- /dev/null +++ b/src/SIOLegacyReader.cc @@ -0,0 +1,132 @@ +#include "podio/SIOLegacyReader.h" +#include "podio/SIOBlock.h" + +#include "sioUtils.h" + +#include +#include +#include + +namespace podio { + +SIOLegacyReader::SIOLegacyReader() { + auto& libLoader [[maybe_unused]] = SIOBlockLibraryLoader::instance(); +} + +void SIOLegacyReader::openFile(const std::string& filename) { + m_stream.open(filename, std::ios::binary); + if (!m_stream.is_open()) { + SIO_THROW(sio::error_code::not_open, "Cannot open input file '" + filename + "' for reading"); + } + + // NOTE: reading TOC record first because that jumps back to the start of the file! + readFileTOCRecord(); + readCollectionIDTable(); +} + +std::unique_ptr SIOLegacyReader::readNextEntry(const std::string& name) { + if (name != m_categoryName) { + return nullptr; + } + // skip possible intermediate records that are not event data + try { + sio::api::go_to_record(m_stream, "event_record"); + } catch (sio::exception&) { + // If anything goes wrong, return a nullptr + return nullptr; + } + + auto [dataBuffer, dataInfo] = sio_utils::readRecord(m_stream, false); + // Need to work around the fact that sio::buffers are not copyable by copying + // the underlying buffer (vector) and then using that to move construct + // a new buffer + sio::buffer::container bufferBytes{m_tableBuffer.data(), m_tableBuffer.data() + m_tableBuffer.size()}; + auto tableBuffer = sio::buffer(std::move(bufferBytes)); + + m_eventNumber++; + return std::make_unique(std::move(dataBuffer), dataInfo._uncompressed_length, std::move(tableBuffer), + m_tableUncLength); +} + +std::unique_ptr SIOLegacyReader::readEntry(const std::string& name, const unsigned entry) { + if (name != m_categoryName) { + return nullptr; + } + + // Setting the event number to the desired one here and putting the stream to + // the right position is the necessary setup before simply handing off to readNextEntry + m_eventNumber = entry; + // NOTE: In legacy files the "events" are stored in "event_record" records + const auto recordPos = m_tocRecord.getPosition("event_record", entry); + if (recordPos == 0) { + return nullptr; + } + m_stream.seekg(recordPos); + + return readNextEntry(name); +} + +unsigned SIOLegacyReader::getEntries(const std::string& name) const { + if (name != "events") { + return 0; + } + return m_tocRecord.getNRecords("event_record"); +} + +void SIOLegacyReader::readCollectionIDTable() { + // Need to decompress the buffers here, because in this record not only the + // collectionID table is stored, but also the version information... + auto [infoBuffer, _] = sio_utils::readRecord(m_stream, true); + + sio::block_list blocks; + blocks.emplace_back(std::make_shared()); + blocks.emplace_back(std::make_shared()); + sio::api::read_blocks(infoBuffer.span(), blocks); + + m_fileVersion = static_cast(blocks[1].get())->version; + + // recompress the collection ID table block... + blocks.resize(1); // remove the SIOVersionBlock + auto tmpUncBuffer = sio::buffer{sio::mbyte}; + auto tmpRecInfo = sio::api::write_record("dummy", tmpUncBuffer, blocks, 0); + sio::zlib_compression compressor; + compressor.set_level(6); + sio::api::compress_record(tmpRecInfo, tmpUncBuffer, m_tableBuffer, compressor); + m_tableUncLength = tmpRecInfo._uncompressed_length; +} + +bool SIOLegacyReader::readFileTOCRecord() { + // Check if there is a dedicated marker at the end of the file that tells us + // where the TOC actually starts + m_stream.seekg(-sio_helpers::SIOTocInfoSize, std::ios_base::end); + uint64_t firstWords{0}; + m_stream.read(reinterpret_cast(&firstWords), sizeof(firstWords)); + + const uint32_t marker = (firstWords >> 32) & 0xffffffff; + if (marker == sio_helpers::SIOTocMarker) { + const uint32_t position = firstWords & 0xffffffff; + m_stream.seekg(position); + + const auto& [uncBuffer, _] = sio_utils::readRecord(m_stream); + + sio::block_list blocks; + auto tocBlock = std::make_shared(); + tocBlock->record = &m_tocRecord; + blocks.push_back(tocBlock); + + sio::api::read_blocks(uncBuffer.span(), blocks); + + m_stream.seekg(0); + return true; + } + + m_stream.clear(); + m_stream.seekg(0); + return false; +} + +std::vector SIOLegacyReader::getAvailableCategories() const { + return {m_categoryName}; +} + +} // namespace podio diff --git a/src/root_selection.xml b/src/root_selection.xml index 41fd14a8a..886a69e68 100644 --- a/src/root_selection.xml +++ b/src/root_selection.xml @@ -1,6 +1,7 @@ + diff --git a/src/sioUtils.h b/src/sioUtils.h new file mode 100644 index 000000000..8401ac3cf --- /dev/null +++ b/src/sioUtils.h @@ -0,0 +1,33 @@ +#ifndef PODIO_SIO_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy +#define PODIO_SIO_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy + +#include +#include +#include + +#include + +namespace podio { +namespace sio_utils { + // Read the record into a buffer and potentially uncompress it + inline std::pair readRecord(sio::ifstream& stream, bool decompress = true, + std::size_t initBufferSize = sio::mbyte) { + sio::record_info recInfo; + sio::buffer infoBuffer{sio::max_record_info_len}; + sio::buffer recBuffer{initBufferSize}; + sio::api::read_record_info(stream, recInfo, infoBuffer); + sio::api::read_record_data(stream, recInfo, recBuffer); + + if (decompress) { + sio::buffer uncBuffer{recInfo._uncompressed_length}; + sio::zlib_compression compressor; + compressor.uncompress(recBuffer.span(), uncBuffer); + return std::make_pair(std::move(uncBuffer), recInfo); + } + + return std::make_pair(std::move(recBuffer), recInfo); + } +} // namespace sio_utils +} // namespace podio + +#endif diff --git a/src/sio_selection.xml b/src/sio_selection.xml index 78d43d3ca..b0101fd3e 100644 --- a/src/sio_selection.xml +++ b/src/sio_selection.xml @@ -1,6 +1,7 @@ + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c82ba3092..f32ce7b1b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -45,7 +45,8 @@ set(root_dependent_tests write_timed.cpp read_timed.cpp read_frame.cpp - write_frame_root.cpp) + write_frame_root.cpp + read_frame_legacy_root.cpp) set(root_libs TestDataModelDict podio::podioRootIO) foreach( sourcefile ${root_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${root_libs}") @@ -128,7 +129,8 @@ if (TARGET TestDataModelSioBlocks) write_timed_sio.cpp read_timed_sio.cpp read_frame_sio.cpp - write_frame_sio.cpp) + write_frame_sio.cpp + read_frame_legacy_sio.cpp) set(sio_libs podio::podioSioIO) foreach( sourcefile ${sio_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${sio_libs}") @@ -145,6 +147,7 @@ endif() set_property(TEST read PROPERTY DEPENDS write) set_property(TEST read-multiple PROPERTY DEPENDS write) set_property(TEST read_and_write PROPERTY DEPENDS write) +set_property(TEST read_frame_legacy_root PROPERTY DEPENDS write) set_property(TEST read_timed PROPERTY DEPENDS write_timed) set_property(TEST read_frame PROPERTY DEPENDS write_frame_root) @@ -159,6 +162,7 @@ if (TARGET read_sio) set_property(TEST read_and_write_sio PROPERTY DEPENDS write_sio) set_property(TEST read_timed_sio PROPERTY DEPENDS write_timed_sio) set_property(TEST read_frame_sio PROPERTY DEPENDS write_frame_sio) + set_property(TEST read_frame_legacy_sio PROPERTY DEPENDS write_sio) add_test(NAME check_benchmark_outputs_sio COMMAND check_benchmark_outputs write_benchmark_sio.root read_benchmark_sio.root) set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio write_timed_sio) diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index c0c3b04f5..5eeb1d6b6 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -20,6 +20,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ check_benchmark_outputs read-multiple read-legacy-files + read_frame_legacy_root write_frame_root read_frame @@ -32,6 +33,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ check_benchmark_outputs_sio write_frame_sio read_frame_sio + read_frame_legacy_sio write_ascii diff --git a/tests/read_frame_legacy_root.cpp b/tests/read_frame_legacy_root.cpp new file mode 100644 index 000000000..9dfdfcc7a --- /dev/null +++ b/tests/read_frame_legacy_root.cpp @@ -0,0 +1,43 @@ +#include "read_test.h" + +#include "podio/Frame.h" +#include "podio/ROOTLegacyReader.h" + +#include + +int main() { + auto reader = podio::ROOTLegacyReader(); + reader.openFile("example.root"); + + if (reader.currentFileVersion() != podio::version::build_version) { + std::cerr << "The podio build version could not be read back correctly. " + << "(expected:" << podio::version::build_version << ", actual: " << reader.currentFileVersion() << ")" + << std::endl; + return 1; + } + + if (reader.getEntries("events") != 2000) { + std::cerr << "Could not read back the number of events correctly. " + << "(expected:" << 2000 << ", actual: " << reader.getEntries("events") << ")" << std::endl; + return 1; + } + + for (size_t i = 0; i < reader.getEntries("events"); ++i) { + const auto frame = podio::Frame(reader.readNextEntry("events")); + processEvent(frame, i, reader.currentFileVersion()); + } + + // Reading specific entries + { + auto frame = podio::Frame(reader.readEntry("events", 4)); + processEvent(frame, 4, reader.currentFileVersion()); + + auto nextFrame = podio::Frame(reader.readNextEntry("events")); + processEvent(nextFrame, 5, reader.currentFileVersion()); + + auto previousFrame = podio::Frame(reader.readEntry("events", 2)); + processEvent(previousFrame, 2, reader.currentFileVersion()); + } + + return 0; +} diff --git a/tests/read_frame_legacy_sio.cpp b/tests/read_frame_legacy_sio.cpp new file mode 100644 index 000000000..a29a0bb18 --- /dev/null +++ b/tests/read_frame_legacy_sio.cpp @@ -0,0 +1,43 @@ +#include "read_test.h" + +#include "podio/Frame.h" +#include "podio/SIOLegacyReader.h" + +#include + +int main() { + auto reader = podio::SIOLegacyReader(); + reader.openFile("example.sio"); + + if (reader.currentFileVersion() != podio::version::build_version) { + std::cerr << "The podio build version could not be read back correctly. " + << "(expected:" << podio::version::build_version << ", actual: " << reader.currentFileVersion() << ")" + << std::endl; + return 1; + } + + if (reader.getEntries("events") != 2000) { + std::cerr << "Could not read back the number of events correctly. " + << "(expected:" << 2000 << ", actual: " << reader.getEntries("events") << ")" << std::endl; + return 1; + } + + for (size_t i = 0; i < reader.getEntries("events"); ++i) { + const auto frame = podio::Frame(reader.readNextEntry("events")); + processEvent(frame, i, reader.currentFileVersion()); + } + + // Reading specific entries + { + auto frame = podio::Frame(reader.readEntry("events", 4)); + processEvent(frame, 4, reader.currentFileVersion()); + + auto nextFrame = podio::Frame(reader.readNextEntry("events")); + processEvent(nextFrame, 5, reader.currentFileVersion()); + + auto previousFrame = podio::Frame(reader.readEntry("events", 2)); + processEvent(previousFrame, 2, reader.currentFileVersion()); + } + + return 0; +} From 4a6f1043d8f1d5de0578eb5445bd7c21554d64cf Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 14 Nov 2022 10:03:05 +0100 Subject: [PATCH 016/100] Frame support for `podio-dump` (#344) * Add dumping functionality for Frames * Add a few basic tests for podio-dump --- CMakeLists.txt | 4 +- python/podio/base_reader.py | 13 ++++ python/podio/frame.py | 59 ++++++++++++++--- python/podio/reading.py | 46 +++++++++++++ python/podio/root_io.py | 1 + python/podio/sio_io.py | 1 + tests/CTestCustom.cmake | 11 ++++ tools/CMakeLists.txt | 44 +++++++++++++ tools/podio-dump | 126 ++++++++++++++++++++++-------------- 9 files changed, 245 insertions(+), 60 deletions(-) create mode 100644 python/podio/reading.py diff --git a/CMakeLists.txt b/CMakeLists.txt index c87330a8c..486b02351 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,12 +157,10 @@ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE #--- project specific subdirectories ------------------------------------------- add_subdirectory(python) add_subdirectory(src) -add_subdirectory(tools) if(BUILD_TESTING) add_subdirectory(tests) endif() +add_subdirectory(tools) #--- add CMake infrastructure -------------------------------------------------- include(cmake/podioCreateConfig.cmake) - - diff --git a/python/podio/base_reader.py b/python/podio/base_reader.py index 78549a5a4..b45cfa3f1 100644 --- a/python/podio/base_reader.py +++ b/python/podio/base_reader.py @@ -21,6 +21,10 @@ def __init__(self): setup. """ self._categories = tuple(s.data() for s in self._reader.getAvailableCategories()) + if hasattr(self, '_is_legacy'): + self._is_legacy = getattr(self, '_is_legacy') + else: + self._is_legacy = False # by default assume we are not legacy @property def categories(self): @@ -42,3 +46,12 @@ def get(self, category): desired category """ return FrameCategoryIterator(self._reader, category) + + @property + def is_legacy(self): + """Whether this is a legacy file reader or not. + + Returns: + bool: True if this is a legacy file reader + """ + return self._is_legacy diff --git a/python/podio/frame.py b/python/podio/frame.py index 903ccf41d..0186d6801 100644 --- a/python/podio/frame.py +++ b/python/podio/frame.py @@ -43,17 +43,21 @@ def _determine_supported_parameter_types(lang): SUPPORTED_PARAMETER_PY_TYPES = _determine_supported_parameter_types('py') +# Map that is necessary for easier disambiguation of parameters that are +# available with more than one type under the same name. Maps a python type to +# a c++ vector of the corresponding type or a c++ type to the vector +_PY_TO_CPP_TYPE_MAP = { + pytype: f'std::vector<{cpptype}>' for (pytype, cpptype) in zip(SUPPORTED_PARAMETER_PY_TYPES, + SUPPORTED_PARAMETER_TYPES) + } +_PY_TO_CPP_TYPE_MAP.update({ + f'{cpptype}': f'std::vector<{cpptype}>' for cpptype in SUPPORTED_PARAMETER_TYPES + }) + + class Frame: """Frame class that serves as a container of collection and meta data.""" - # Map that is necessary for easier disambiguation of parameters that are - # available with more than one type under the same name. Maps a python type to - # a c++ vector of the corresponding type - _py_to_cpp_type_map = { - pytype: f'std::vector<{cpptype}>' for (pytype, cpptype) in zip(SUPPORTED_PARAMETER_PY_TYPES, - SUPPORTED_PARAMETER_TYPES) - } - def __init__(self, data=None): """Create a Frame. @@ -139,7 +143,7 @@ def _get_param_value(par_type, name): raise ValueError(f'{name} parameter has {len(par_type)} different types available, ' 'but no as_type argument to disambiguate') - req_type = self._py_to_cpp_type_map.get(as_type, None) + req_type = _PY_TO_CPP_TYPE_MAP.get(as_type, None) if req_type is None: raise ValueError(f'as_type value {as_type} cannot be mapped to a valid parameter type') @@ -148,6 +152,43 @@ def _get_param_value(par_type, name): return _get_param_value(req_type, name) + def get_parameters(self): + """Get the complete podio::GenericParameters object stored in this Frame. + + NOTE: This is mainly intended for dumping things, for actually obtaining + parameters please use get_parameter + + Returns: + podio.GenericParameters: The stored generic parameters + """ + # Going via the not entirely inteded way here + return self._frame.getGenericParametersForWrite() + + def get_param_info(self, name): + """Get the parameter type information stored under the given name. + + Args: + name (str): The parameter name + + Returns: + dict (str: int): The c++-type(s) of the stored parameter and the number of + parameters + + Raise: + KeyError: If no parameter is stored under the given name + """ + # This raises the KeyError if the name is not present + par_types = [t.replace('std::vector<', '').replace('>', '') for t in self._param_key_types[name]] + # Assume that we have one parameter and update the dictionary below in case + # there are more + par_infos = {t: 1 for t in par_types} + for par_type in par_types: + par_value = self.get_parameter(name, as_type=par_type) + if isinstance(par_value, list): + par_infos[par_type] = len(par_value) + + return par_infos + def _init_param_keys(self): """Initialize the param keys dict for easier lookup of the available parameters. diff --git a/python/podio/reading.py b/python/podio/reading.py new file mode 100644 index 000000000..d9bfe1832 --- /dev/null +++ b/python/podio/reading.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +"""Module for general reading functionality.""" + +from ROOT import TFile + +from podio import root_io +from podio import sio_io + + +def _is_frame_sio_file(filename): + """Peek into the sio file to determine whether this is a legacy file or not.""" + with open(filename, 'rb') as sio_file: + first_line = str(sio_file.readline()) + # The SIO Frame writer writes a podio_header_info at the beginning of the + # file + return first_line.find('podio_header_info') > 0 + + +def _is_frame_root_file(filename): + """Peek into the root file to determine whether this is a legacy file or not.""" + file = TFile.Open(filename) + # The ROOT Frame writer puts a podio_metadata TTree into the file + return bool(file.Get('podio_metadata')) + + +def get_reader(filename): + """Get an appropriate reader for the passed file. + + Args: + filename (str): The input file + + Returns: + root_io.[Legacy]Reader, sio_io.[Legacy]Reader: an initialized reader + that is able to process the input file + """ + if filename.endswith('.sio'): + if _is_frame_sio_file(filename): + return sio_io.Reader(filename) + return sio_io.LegacyReader(filename) + + if filename.endswith('.root'): + if _is_frame_root_file(filename): + return root_io.Reader(filename) + return root_io.LegacyReader(filename) + + raise ValueError('file must end on .root or .sio') diff --git a/python/podio/root_io.py b/python/podio/root_io.py index fd536a013..4dc6f16a7 100644 --- a/python/podio/root_io.py +++ b/python/podio/root_io.py @@ -46,5 +46,6 @@ def __init__(self, filenames): self._reader = podio.ROOTLegacyReader() self._reader.openFiles(filenames) + self._is_legacy = True super().__init__() diff --git a/python/podio/sio_io.py b/python/podio/sio_io.py index 4c9ec1ab2..7559f16cb 100644 --- a/python/podio/sio_io.py +++ b/python/podio/sio_io.py @@ -40,5 +40,6 @@ def __init__(self, filename): """ self._reader = podio.SIOLegacyReader() self._reader.openFile(filename) + self._is_legacy = True super().__init__() diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index 5eeb1d6b6..6d1e4e165 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -41,6 +41,17 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ relation_range pyunittest + + podio-dump-help + podio-dump-root-legacy + podio-dump-root + podio-dump-detailed-root + podio-dump-detailed-root-legacy + + podio-dump-sio-legacy + podio-dump-sio + podio-dump-detailed-sio + podio-dump-detailed-sio-legacy ) # ostream_operator is working with Memory sanitizer (at least locally) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index a3cfeb29e..c0765c28a 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1 +1,45 @@ install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump DESTINATION ${CMAKE_INSTALL_BINDIR}) + +# Add a very basic tests here to make sure that podio-dump at least runs +if(BUILD_TESTING) + # Helper function for easily creating "tests" that simply execute podio-dump + # with different arguments. Not crashing is considered success. + # + # Args: + # name the name of the test + # depends_on the target name of the test that produces the required input file + function(CREATE_DUMP_TEST name depends_on) + add_test(NAME ${name} COMMAND ./podio-dump ${ARGN}) + + set(SIO_LD_PATH "") + if (ENABLE_SIO) + set(SIO_LD_PATH $) + endif() + + set_property(TEST ${name} + PROPERTY ENVIRONMENT + LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/tests:${CMAKE_BINARY_DIR}/src:$:${SIO_LD_PATH}:$ENV{LD_LIBRARY_PATH} + PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH} + ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH} + ) + + set_tests_properties(${name} PROPERTIES + DEPENDS ${depends_on} + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + ) + endfunction() + + CREATE_DUMP_TEST(podio-dump-help _dummy_target_ --help) + CREATE_DUMP_TEST(podio-dump-root-legacy "write" ${CMAKE_BINARY_DIR}/tests/example.root) + CREATE_DUMP_TEST(podio-dump-root "write_frame_root" ${CMAKE_BINARY_DIR}/tests/example_frame.root) + CREATE_DUMP_TEST(podio-dump-detailed-root "write_frame_root" --detailed --category other_events --entries 2:3 ${CMAKE_BINARY_DIR}/tests/example_frame.root) + CREATE_DUMP_TEST(podio-dump-detailed-root-legacy "write" --detailed --entries 2:3 ${CMAKE_BINARY_DIR}/tests/example.root) + + if (ENABLE_SIO) + CREATE_DUMP_TEST(podio-dump-sio-legacy "write_sio" ${CMAKE_BINARY_DIR}/tests/example.sio) + CREATE_DUMP_TEST(podio-dump-sio "write_frame_sio" --entries 4:7 ${CMAKE_BINARY_DIR}/tests/example_frame.sio) + CREATE_DUMP_TEST(podio-dump-detailed-sio "write_frame_sio" --detailed --entries 9 ${CMAKE_BINARY_DIR}/tests/example_frame.sio) + CREATE_DUMP_TEST(podio-dump-detailed-sio-legacy "write_sio" --detailed --entries 9 ${CMAKE_BINARY_DIR}/tests/example.sio) + endif() + +endif() diff --git a/tools/podio-dump b/tools/podio-dump index fcae97b1f..3fd3b366c 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -1,78 +1,106 @@ #!/usr/bin/env python3 """podio-dump tool to dump contents of podio files""" -from podio.EventStore import EventStore +import sys +from podio.reading import get_reader -def dump_evt_overview(event, ievt): - """Print an overview table of the event contents of the given event""" - print('{:#^82}'.format(f' Event {ievt} ')) # pylint: disable=consider-using-f-string - print(f'{"Name":<30} {"Type":<40} {"Size":<10}') - print('-' * 82) - for name in event.collections(): - coll = event.get(name) - print(f'{name:<30} {coll.getValueTypeName():<40} {len(coll):<10}') +def print_general_info(reader, filename): + """Print an overview of the file contents at the very beginning. -def dump_overview(store, events): - """Print an overview for all the desired events""" - for ievt in events: - event = store[ievt] - dump_evt_overview(event, ievt) + This prints things like the available categories (and how many entries they + have) as well as the filename, etc. - -def dump_evt_detailed(event, ievt): - """Dump this event in all its glory""" - print() - print('{:#^82}'.format(f' Event {ievt} ')) # pylint: disable=consider-using-f-string + Args: + reader (root_io.Reader, sio_io.Reader): An initialized reader + """ + print(f'input file: {filename}\n') + legacy_text = ' (this is a legacy file!)' if reader.is_legacy else '' + print(f'Frame categories in this file{legacy_text}:') + print(f'{"Name":<20} {"Entries":<10}') + print('-' * 31) + for category in reader.categories: + print(f'{category:<20} {len(reader.get(category)):<10}') print() - print('Parameters', flush=True) - event.metadata().print() - print(flush=True) - for name in event.collections(): - print(name, flush=True) - event.get(name).print() +def print_frame(frame, cat_name, ientry, detailed): + """Print a Frame overview. + + Args: + frame (podio.Frame): The frame to print + cat_name (str): The category name + ientry (int): The entry number of this Frame + detailed (bool): Print just an overview or dump the whole contents + """ + print('{:#^82}'.format(f' {cat_name} {ientry} ')) # pylint: disable=consider-using-f-string + print('Collections:') + + if not detailed: + print(f'{"Name":<30} {"Type":<40} {"Size":<10}') + print('-' * 82) + + # Print collections + for name in frame.collections: + coll = frame.get(name) + if detailed: + print(name, flush=True) + coll.print() + print(flush=True) + else: + print(f'{name:<30} {coll.getValueTypeName():<40} {len(coll):<10}') + + # And then parameters + print('\nParameters:', flush=True) + if detailed: + frame.get_parameters().print() print(flush=True) + else: + print(f'{"Name":<30} {"Type":<12} {"Elements":<10}') + print('-' * 54) + for name in frame.parameters: + par_infos = frame.get_param_info(name) + for par_type, n_pars in par_infos.items(): + print(f'{name:<30} {par_type:<12} {n_pars:<10}') - -def dump_detailed(store, events): - """Dump the complete event contents for all desired events""" - for ievt in events: - event = store[ievt] - dump_evt_detailed(event, ievt) - print() + # Additional new line before the next entry + print('\n', flush=True) def main(args): """Main""" - store = EventStore([args.inputfile]) - if args.detailed: - dump_detailed(store, args.event) - else: - dump_overview(store, args.event) + reader = get_reader(args.inputfile) + + print_general_info(reader, args.inputfile) + if args.category not in reader.categories: + print(f'ERROR: Cannot print category \'{args.category}\' (not present in file)') + sys.exit(1) + + frames = reader.get(args.category) + for ient in args.entries: + print_frame(frames[ient], args.category, ient, args.detailed) -def parse_evt_range(evt_string): - """Parse which events to print""" +def parse_entry_range(ent_string): + """Parse which entries to print""" try: - return [int(evt_string)] + return [int(ent_string)] except ValueError: pass try: - return [int(i) for i in evt_string.split(',')] + return [int(i) for i in ent_string.split(',')] except ValueError: pass try: - first, last = [int(i) for i in evt_string.split(':')] + first, last = [int(i) for i in ent_string.split(':')] return list(range(first, last + 1)) except ValueError: pass - raise argparse.ArgumentTypeError(f'\'{evt_string}\' cannot be parsed into a list of events') + raise argparse.ArgumentTypeError(f'\'{ent_string}\' cannot be parsed into a list of entries') if __name__ == '__main__': @@ -80,11 +108,13 @@ if __name__ == '__main__': # pylint: disable=invalid-name # before 2.5.0 pylint is too strict with the naming here parser = argparse.ArgumentParser(description='Dump contents of a podio file to stdout') parser.add_argument('inputfile', help='Name of the file to dump content from') - parser.add_argument('-e', '--event', - help='Which event(s) to print. A single number, comma separated list of numbers' - ' or "first:last" for an inclusive range of events. Defaults to the first event.', - type=parse_evt_range, default=[0]) - parser.add_argument('-d', '--detailed', help='Dump the full event contents not just the collection info', + parser.add_argument('-c', '--category', help='Which Frame category to dump', + default='events', type=str) + parser.add_argument('-e', '--entries', + help='Which entries to print. A single number, comma separated list of numbers' + ' or "first:last" for an inclusive range of entries. Defaults to the first entry.', + type=parse_entry_range, default=[0]) + parser.add_argument('-d', '--detailed', help='Dump the full contents not just the collection info', action='store_true', default=False) clargs = parser.parse_args() From c44c92cf6848339e4c1686a06d8165c570075913 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 16 Nov 2022 18:29:05 +0100 Subject: [PATCH 017/100] Fix a few smaller issues with Frame python bindings and podio-dump (#351) * Fix erroneously considering empty collections as not present * Handle missing SIO backend more gracefully in podio-dump --- python/podio/frame.py | 15 ++++++++++----- python/podio/reading.py | 29 +++++++++++++++++++---------- python/podio/test_Frame.py | 3 ++- tests/read_frame.h | 9 +++++++++ tests/write_frame.h | 11 ++++++++++- tools/podio-dump | 6 +++++- 6 files changed, 55 insertions(+), 18 deletions(-) diff --git a/python/podio/frame.py b/python/podio/frame.py index 0186d6801..7f4433959 100644 --- a/python/podio/frame.py +++ b/python/podio/frame.py @@ -2,7 +2,7 @@ """Module for the python bindings of the podio::Frame""" # pylint: disable-next=import-error # gbl is a dynamic module from cppyy -from cppyy.gbl import std +import cppyy import ROOT # NOTE: It is necessary that this can be found on the ROOT_INCLUDE_PATH @@ -22,10 +22,10 @@ def _determine_supported_parameter_types(lang): classes that are supported """ types_tuple = podio.SupportedGenericDataTypes() - n_types = std.tuple_size[podio.SupportedGenericDataTypes].value + n_types = cppyy.gbl.std.tuple_size[podio.SupportedGenericDataTypes].value # Get the python types with the help of cppyy and the STL - py_types = (type(std.get[i](types_tuple)).__name__ for i in range(n_types)) + py_types = (type(cppyy.gbl.std.get[i](types_tuple)).__name__ for i in range(n_types)) if lang == 'py': return tuple(py_types) if lang == 'c++': @@ -58,6 +58,11 @@ def _determine_supported_parameter_types(lang): class Frame: """Frame class that serves as a container of collection and meta data.""" + # cppyy implicitly converts empty collections to False in boolean contexts. To + # distinguish between empty and non-existant collection create a nullptr here + # with the correct type that we can compare against + _coll_nullptr = cppyy.bind_object(cppyy.nullptr, 'podio::CollectionBase') + def __init__(self, data=None): """Create a Frame. @@ -95,8 +100,8 @@ def get(self, name): KeyError: If the collection with the name is not available """ collection = self._frame.get(name) - if not collection: - raise KeyError + if collection == self._coll_nullptr: + raise KeyError(f"Collection '{name}' is not available") return collection @property diff --git a/python/podio/reading.py b/python/podio/reading.py index d9bfe1832..f853e1795 100644 --- a/python/podio/reading.py +++ b/python/podio/reading.py @@ -4,16 +4,21 @@ from ROOT import TFile from podio import root_io -from podio import sio_io +try: + from podio import sio_io + def _is_frame_sio_file(filename): + """Peek into the sio file to determine whether this is a legacy file or not.""" + with open(filename, 'rb') as sio_file: + first_line = str(sio_file.readline()) + # The SIO Frame writer writes a podio_header_info at the beginning of the + # file + return first_line.find('podio_header_info') > 0 -def _is_frame_sio_file(filename): - """Peek into the sio file to determine whether this is a legacy file or not.""" - with open(filename, 'rb') as sio_file: - first_line = str(sio_file.readline()) - # The SIO Frame writer writes a podio_header_info at the beginning of the - # file - return first_line.find('podio_header_info') > 0 +except ImportError: + def _is_frame_sio_file(filename): + """Stub raising a ValueError""" + raise ValueError('podio has not been built with SIO support, which is necessary to read this file') def _is_frame_root_file(filename): @@ -30,8 +35,12 @@ def get_reader(filename): filename (str): The input file Returns: - root_io.[Legacy]Reader, sio_io.[Legacy]Reader: an initialized reader - that is able to process the input file + root_io.[Legacy]Reader, sio_io.[Legacy]Reader: an initialized reader that + is able to process the input file. + + Raises: + ValueError: If the file cannot be recognized, or if podio has not been + built with the necessary backend I/O support """ if filename.endswith('.sio'): if _is_frame_sio_file(filename): diff --git a/python/podio/test_Frame.py b/python/podio/test_Frame.py index 1390e08ce..9e67077a5 100644 --- a/python/podio/test_Frame.py +++ b/python/podio/test_Frame.py @@ -12,7 +12,8 @@ 'arrays', 'WithVectorMember', 'info', 'fixedWidthInts', 'mcparticles', 'moreMCs', 'mcParticleRefs', 'hits', 'hitRefs', 'clusters', 'refs', 'refs2', 'OneRelation', 'userInts', 'userDoubles', 'WithNamespaceMember', - 'WithNamespaceRelation', 'WithNamespaceRelationCopy' + 'WithNamespaceRelation', 'WithNamespaceRelationCopy', + 'emptyCollection', 'emptySubsetColl' } # The expected parameter names in each frame EXPECTED_PARAM_NAMES = {'anInt', 'UserEventWeight', 'UserEventName', 'SomeVectorData'} diff --git a/tests/read_frame.h b/tests/read_frame.h index 166bd3456..b48089cd1 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -35,6 +35,15 @@ int read_frames(const std::string& filename) { // sure that the writing/reading order does not impose any usage requirements for (size_t i = 0; i < reader.getEntries("events"); ++i) { auto frame = podio::Frame(reader.readNextEntry("events")); + if (frame.get("emptySubsetColl") == nullptr) { + std::cerr << "Could not retrieve an empty subset collection" << std::endl; + return 1; + } + if (frame.get("emptyCollection") == nullptr) { + std::cerr << "Could not retrieve an empty collection" << std::endl; + return 1; + } + processEvent(frame, i, reader.currentFileVersion()); auto otherFrame = podio::Frame(reader.readNextEntry("other_events")); diff --git a/tests/write_frame.h b/tests/write_frame.h index e72ae6b50..b957fa63b 100644 --- a/tests/write_frame.h +++ b/tests/write_frame.h @@ -37,7 +37,9 @@ static const std::vector collsToWrite = {"mcparticles", "userDoubles", "WithNamespaceMember", "WithNamespaceRelation", - "WithNamespaceRelationCopy"}; + "WithNamespaceRelationCopy", + "emptyCollection", + "emptySubsetColl"}; auto createMCCollection() { auto mcps = ExampleMCCollection(); @@ -368,6 +370,13 @@ podio::Frame makeFrame(int iFrame) { frame.putParameter("SomeVectorData", {1, 2, 3, 4}); frame.putParameter("SomeVectorData", {"just", "some", "strings"}); + // An empty collection + frame.put(ExampleClusterCollection(), "emptyCollection"); + // An empty subset collection + auto emptySubsetColl = ExampleHitCollection(); + emptySubsetColl.setSubsetCollection(); + frame.put(std::move(emptySubsetColl), "emptySubsetColl"); + return frame; } diff --git a/tools/podio-dump b/tools/podio-dump index 3fd3b366c..0834053c9 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -70,7 +70,11 @@ def print_frame(frame, cat_name, ientry, detailed): def main(args): """Main""" - reader = get_reader(args.inputfile) + try: + reader = get_reader(args.inputfile) + except ValueError as err: + print(f'ERROR: Cannot open file \'{args.inputfile}\': {err}') + sys.exit(1) print_general_info(reader, args.inputfile) if args.category not in reader.categories: From 34ca51e1fc0bcbbe9d594fa66edb8acf8484454f Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Fri, 2 Dec 2022 09:19:26 +0100 Subject: [PATCH 018/100] Make scripts POSIX compliant and fix mix of tabs and spaces (#354) * Add /include to ROOT_INCLUDE_PATH --- env.sh | 24 +++++++++++++----------- init.sh | 8 ++++---- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/env.sh b/env.sh index ee1f418af..8c50d6138 100644 --- a/env.sh +++ b/env.sh @@ -8,25 +8,27 @@ if [ -z "$PODIO" ]; then fi unamestr=`uname` -if [[ "$unamestr" == 'Linux' ]]; then - echo $LD_LIBRARY_PATH | grep $PODIO/lib >& /dev/null - if [ $? == "1" ]; then +if [[ "$unamestr" = 'Linux' ]]; then + if ! echo $LD_LIBRARY_PATH | grep $PODIO/lib > /dev/null 2>&1; then # RedHat based put the libraries into lib64 if [ -d $PODIO/lib64 ]; then export LD_LIBRARY_PATH=$PODIO/lib64:$LD_LIBRARY_PATH else export LD_LIBRARY_PATH=$PODIO/lib:$LD_LIBRARY_PATH fi - fi -elif [[ "$unamestr" == 'Darwin' ]]; then + fi +elif [[ "$unamestr" = 'Darwin' ]]; then # This currenty does not work on OS X as DYLD_LIBRARY_PATH is ignored # in recent OS X versions - echo $DYLD_LIBRARY_PATH | grep $PODIO/lib >& /dev/null - if [ $? == "1" ]; then - export DYLD_LIBRARY_PATH=$PODIO/lib:$DYLD_LIBRARY_PATH + if ! echo $DYLD_LIBRARY_PATH | grep -o $PODIO/lib > /dev/null 2>&1; then + export DYLD_LIBRARY_PATH=$PODIO/lib:$DYLD_LIBRARY_PATH fi fi -echo $PYTHONPATH | grep $PODIO/python >& /dev/null -if [ $? == "1" ]; then - export PYTHONPATH=$PODIO/python:$PYTHONPATH + +if ! echo $PYTHONPATH | grep -o $PODIO/python > /dev/null 2>&1; then + export PYTHONPATH=$PODIO/python:$PYTHONPATH +fi + +if ! echo $ROOT_INCLUDE_PATH | grep -o $PODIO/include > /dev/null 2>&1; then + export ROOT_INCLUDE_PATH=$PODIO/include:$ROOT_INCLUDE_PATH fi diff --git a/init.sh b/init.sh index 12e1d6539..8d2ffdfac 100644 --- a/init.sh +++ b/init.sh @@ -11,14 +11,14 @@ # First see if PODIO is already set if [ -n "$PODIO" -a "$1" != "-r" ]; then - echo "PODIO already set - use '-r' if you want to reinitialise it" - return + echo "PODIO already set - use '-r' if you want to reinitialise it" + return fi export PODIO=$(pwd)/install if [ -e env.sh ]; then - source ./env.sh + source ./env.sh else - echo "To complete PODIO setup please source the 'env.sh' script" + echo "To complete PODIO setup please source the 'env.sh' script" fi From 773412d3df8ce2cca37e5c8ec98b16d308cb553e Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 5 Dec 2022 19:11:13 +0100 Subject: [PATCH 019/100] Make sure /bin is on path after env.sh (#357) Necessary for podio-dump --- env.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/env.sh b/env.sh index 8c50d6138..164c044aa 100644 --- a/env.sh +++ b/env.sh @@ -32,3 +32,7 @@ fi if ! echo $ROOT_INCLUDE_PATH | grep -o $PODIO/include > /dev/null 2>&1; then export ROOT_INCLUDE_PATH=$PODIO/include:$ROOT_INCLUDE_PATH fi + +if ! echo $PATH | grep $PODIO/bin > /dev/null 2>&1; then + export PATH=$PODIO/bin:$PATH +fi From c19e4279aa65f578f5f8b72ce4113280c1eac600 Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Tue, 6 Dec 2022 12:00:19 +0100 Subject: [PATCH 020/100] Fix path in the README and simplify option parsing (#356) * Fix README with the new path * Use the choices from argparse for the handlers --- README.md | 5 +---- python/podio_class_generator.py | 15 ++------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index e3c56aa36..2215a0c64 100755 --- a/README.md +++ b/README.md @@ -151,10 +151,7 @@ The C++ in [tests/datamodel/](tests/datamodel/) has been fully generated by a co To run the code generation script, do - mkdir ../Tmp - mkdir ../Tmp/data - mkdir ../Tmp/src - python ../python/podio_class_generator.py ../examples/datalayout.yaml ../Tmp data ROOT + python ../python/podio_class_generator.py ../tests/datalayout.yaml ../Tmp data ROOT The generation script has the following additional options: diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 42187e8b0..f07d83c5e 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -468,17 +468,6 @@ def _sort_includes(self, includes): return package_includes + upstream_includes + podio_includes + stl_includes -def verify_io_handlers(handler): - """Briefly verify that all arguments passed as handlers are indeed valid""" - valid_handlers = ( - 'ROOT', - 'SIO', - ) - if handler in valid_handlers: - return handler - raise argparse.ArgumentTypeError(f'{handler} is not a valid io handler') - - def read_upstream_edm(name_path): """Read an upstream EDM yaml definition file to make the types that are defined in that available to the current EDM""" @@ -511,8 +500,8 @@ def read_upstream_edm(name_path): 'Header files will be put under //*.h. ' 'Source files will be put under /src/*.cc') parser.add_argument('packagename', help='Name of the package.') - parser.add_argument('iohandlers', help='The IO backend specific code that should be generated', - type=verify_io_handlers, nargs='+') + parser.add_argument('iohandlers', choices=['ROOT', 'SIO'], nargs='+', + help='The IO backend specific code that should be generated') parser.add_argument('-q', '--quiet', dest='verbose', action='store_false', default=True, help='Don\'t write a report to screen') parser.add_argument('-d', '--dryrun', action='store_true', default=False, From 1f8f560619053e5ba5ad44f128e7aa77ca123837 Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Mon, 7 Nov 2022 15:57:36 +0100 Subject: [PATCH 021/100] move to Apache 2.0 license as discussed in #324 --- LICENSE | 674 -------------------------------------------------------- NOTICE | 15 +- 2 files changed, 12 insertions(+), 677 deletions(-) delete mode 100644 LICENSE diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 94a9ed024..000000000 --- a/LICENSE +++ /dev/null @@ -1,674 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. diff --git a/NOTICE b/NOTICE index e0cbbf3bd..5c65beee6 100644 --- a/NOTICE +++ b/NOTICE @@ -1,4 +1,13 @@ -(c) Copyright 2015 Benedikt Hegner +Copyright [2015 - 2022] [PODIO developer team] -This software is distributed under the terms of the GNU General Public -Licence version 3 (GPL Version 3), copied verbatim in the file "LICENCE". + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this library except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From d15a43aad8967b975f89463c36ab6b632a59ce3a Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 10 Nov 2022 16:28:41 +0100 Subject: [PATCH 022/100] Only install files that are present --- CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 486b02351..e5d09ffda 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,8 +150,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/python/__init__.py.in ${CMAKE_CURRENT_SOURCE_DIR}/python/podio/__init__.py) #--- add license files --------------------------------------------------------- -install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/LICENSE - ${CMAKE_CURRENT_SOURCE_DIR}/NOTICE +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/NOTICE DESTINATION ${CMAKE_INSTALL_DOCDIR}) #--- project specific subdirectories ------------------------------------------- From 57578f51f9ca614696e02db315ff21a8eed9f4a4 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 14 Dec 2022 11:56:10 +0100 Subject: [PATCH 023/100] Release Notes for v00-16-01 --- doc/ReleaseNotes.md | 64 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/doc/ReleaseNotes.md b/doc/ReleaseNotes.md index 594371561..aaca3f025 100644 --- a/doc/ReleaseNotes.md +++ b/doc/ReleaseNotes.md @@ -1,3 +1,67 @@ +# v00-16-01 + +* 2022-12-06 jmcarcell ([PR#356](https://github.com/AIDASoft/podio/pull/356)) + - Fix path in the README + - Use the functionality in argparse to choose between options + +* 2022-12-06 Benedikt Hegner ([PR#346](https://github.com/AIDASoft/podio/pull/346)) + - Switched tp Apache 2.0 license to facilitate integration in experiment stacks. + +* 2022-12-05 Thomas Madlener ([PR#357](https://github.com/AIDASoft/podio/pull/357)) + - Put `/bin` onto `PATH` in order to make `podio-dump` available from environments created with `env.sh` + +* 2022-12-02 jmcarcell ([PR#354](https://github.com/AIDASoft/podio/pull/354)) + - Make `env.sh` setup script POSIX compliant to run in shells other than bash + - Change `==` to `=` + - Change tabs to spaces (two) to avoid mix of spaces and tabs for indenting + - Add `/include` to `ROOT_INCLUDE_PATH` (as it is required since #343) + +* 2022-11-16 Thomas Madlener ([PR#351](https://github.com/AIDASoft/podio/pull/351)) + - Fix bug in Frame python bindings where empty collections were considered as non-existing. Replacing the original check relying on some implicit boolean conversions (which also caught empty collections) to an explicit check against `nullptr`. + - Make `podio-dump` more robust in installations without SIO support, by guarding the corresponding import. + +* 2022-11-14 Thomas Madlener ([PR#344](https://github.com/AIDASoft/podio/pull/344)) + - Make `podio-dump` work with new Frame based I/O (fixes #339) + - Keep existing functionality intact by using the legacy readers introduced in #345. + +* 2022-11-11 Thomas Madlener ([PR#345](https://github.com/AIDASoft/podio/pull/345)) + - Add a `ROOTLegacyReader` and a `SIOLegacyReader` that read files that have been written prior to #287 into `podio::Frame`s and offers the same interface as the frame readers + - Also including python bindings for it + +* 2022-11-10 Thomas Madlener ([PR#349](https://github.com/AIDASoft/podio/pull/349)) + - Fix bug in setting relations in nested get calls in `podio::Frame`. Fixes #348 + - Adapt the read test to actually check this. Previously this went unnoticed, because the necessary relations were already set in a previous call. + +* 2022-11-10 Thomas Madlener ([PR#343](https://github.com/AIDASoft/podio/pull/343)) + - Add python bindings for `Frame` based I/O + - Available from `podio.root_io` and `podio.sio_io`, where a `Reader` and a `Writer` is implemented for each. + - Wrapper around `podio::Frame`. **Requires that the `podio/Frame.h` header is available somewhere on the `ROOT_INCLUDE_PATH`**. + - Add necessary functionality for python bindings to C++ API + - untyped `Frame::get` method for getting collections + - New constructor from `FrameDataT&&` + - functionality to inspect file and `Frame` contents more easily + - Reorganize python code into structure that follows the usual python packaging conventions a bit more closely + - Introduce the `podio` module. Make CMake generate the `__init__.py` with the correct version + - Move everything except the generator script into `module`. Additionally also keep an `EventStore` wrapper to not break existing code. + - Refactor the `CMakeLists.txt` that is responsible for building the core and all required I/O libraries + - Build more dictionaries for more python bindings. + +* 2022-11-02 Thomas Madlener ([PR#342](https://github.com/AIDASoft/podio/pull/342)) + - Migrate to `actions/checkout@v3` as advised by [github](https://github.blog/changelog/2022-09-22-github-actions-all-actions-will-begin-running-on-node16-instead-of-node12/) + - Use the checkout action to clone the dependencies in the edm4hep workflow instead of doing an explicit clone in the body of the action + +* 2022-11-02 Dmitry Kalinkin ([PR#327](https://github.com/AIDASoft/podio/pull/327)) + - fix typo in documentation + +* 2022-10-24 Juraj Smiesko ([PR#340](https://github.com/AIDASoft/podio/pull/340)) + - Adding reading of specific entry from frame + +* 2022-10-21 Thomas Madlener ([PR#335](https://github.com/AIDASoft/podio/pull/335)) + - Update the `github-action-cvmfs` and `run-lcg-view` actions to their latest available version to pick up the latest improvements (caching of dependencies, log groups) + - Introduce log groups in github actions for easier to interpret outputs + - Switch to LCG_102 for lcg based build environments + - Add a workflow that builds and tests EDM4hep after building podio + # v00-16 * 2022-10-04 Thomas Madlener ([PR#337](https://github.com/AIDASoft/podio/pull/337)) From 5f3ba080c862fecf9de8242b4031619774e8fcb8 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 14 Dec 2022 11:56:11 +0100 Subject: [PATCH 024/100] Updating version to v00-16-01 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e5d09ffda..25063d7c8 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ project(podio) #--- Version ------------------------------------------------------------------- SET( ${PROJECT_NAME}_VERSION_MAJOR 0 ) SET( ${PROJECT_NAME}_VERSION_MINOR 16 ) -SET( ${PROJECT_NAME}_VERSION_PATCH 0 ) +SET( ${PROJECT_NAME}_VERSION_PATCH 1 ) SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" ) From 0a3bb19ee95506b5b50e5d3037fc680a5ecacc49 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 16 Dec 2022 18:21:01 +0100 Subject: [PATCH 025/100] Initialize unique_ptr in initializer list for nvcc (#333) * Initialize unique_ptr in initializer list for nvcc * Skip tests for podio build in edm4hep workflows Co-authored-by: Valentin Volkl --- .github/workflows/edm4hep.yaml | 9 ++++----- .github/workflows/ubuntu.yml | 3 ++- cmake/podioBuild.cmake | 4 ++++ env.sh | 4 ++++ include/podio/CollectionIDTable.h | 11 ++++------- include/podio/GenericParameters.h | 14 +++++++------- python/templates/Collection.cc.jinja2 | 4 ++-- python/templates/Collection.h.jinja2 | 2 +- src/CollectionIDTable.cc | 11 +++++++++++ src/GenericParameters.cc | 6 ++++++ 10 files changed, 45 insertions(+), 23 deletions(-) diff --git a/.github/workflows/edm4hep.yaml b/.github/workflows/edm4hep.yaml index 06149dc90..3ee886439 100644 --- a/.github/workflows/edm4hep.yaml +++ b/.github/workflows/edm4hep.yaml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: build-and-test: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: @@ -43,16 +43,15 @@ jobs: -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ -DUSE_EXTERNAL_CATCH2=ON \ + -DBUILD_TESTING=OFF\ -G Ninja .. ninja -k0 echo "::endgroup::" echo "::group::Test and install podio" ctest --output-on-failure ninja install - export ROOT_INCLUDE_PATH=$STARTDIR/podio/install/include:$ROOT_INCLUDE_PATH:$CPATH - unset CPATH - export CMAKE_PREFIX_PATH=$STARTDIR/podio/install:$CMAKE_PREFIX_PATH - export LD_LIBRARY_PATH=$STARTDIR/podio/install/lib:$STARTDIR/podio/install/lib64:$LD_LIBRARY_PATH + cd $STARTDIR/podio + source init.sh && source env.sh echo "::endgroup::" echo "::group::Build and test EDM4hep" cd $STARTDIR/edm4hep diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 654b14a37..d7521bc53 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: test: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: @@ -26,6 +26,7 @@ jobs: -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ -DUSE_EXTERNAL_CATCH2=OFF \ + -DPODIO_SET_RPATH=ON \ -G Ninja .. echo "::endgroup::" echo "::group::Build" diff --git a/cmake/podioBuild.cmake b/cmake/podioBuild.cmake index 605f0de72..acbf2a5e1 100644 --- a/cmake/podioBuild.cmake +++ b/cmake/podioBuild.cmake @@ -27,6 +27,10 @@ macro(podio_set_rpath) if("${isSystemDir}" STREQUAL "-1") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${LIBDIR}") endif("${isSystemDir}" STREQUAL "-1") + # Make sure to actualy set RPATH and not RUNPATH by disabling the new dtags + # explicitly. Set executable and shared library linker flags for this + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--disable-new-dtags") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--disable-new-dtags") else() set(CMAKE_SKIP_INSTALL_RPATH TRUE) # skip the full RPATH for the install tree endif() diff --git a/env.sh b/env.sh index 164c044aa..d5eca5323 100644 --- a/env.sh +++ b/env.sh @@ -36,3 +36,7 @@ fi if ! echo $PATH | grep $PODIO/bin > /dev/null 2>&1; then export PATH=$PODIO/bin:$PATH fi + +if ! echo $CMAKE_PREFIX_PATH | grep $PODIO > /dev/null 2>&1; then + export CMAKE_PREFIX_PATH=$PODIO:$CMAKE_PREFIX_PATH +fi diff --git a/include/podio/CollectionIDTable.h b/include/podio/CollectionIDTable.h index c639a904c..47d51251b 100644 --- a/include/podio/CollectionIDTable.h +++ b/include/podio/CollectionIDTable.h @@ -11,8 +11,7 @@ namespace podio { class CollectionIDTable { public: - /// default constructor - CollectionIDTable() = default; + CollectionIDTable(); ~CollectionIDTable() = default; CollectionIDTable(const CollectionIDTable&) = delete; @@ -21,11 +20,9 @@ class CollectionIDTable { CollectionIDTable& operator=(CollectionIDTable&&) = default; /// constructor from existing ID:name mapping - CollectionIDTable(std::vector&& ids, std::vector&& names) : - m_collectionIDs(std::move(ids)), m_names(std::move(names)){}; + CollectionIDTable(std::vector&& ids, std::vector&& names); - CollectionIDTable(const std::vector& ids, const std::vector& names) : - m_collectionIDs(ids), m_names(names){}; + CollectionIDTable(const std::vector& ids, const std::vector& names); /// return collection ID for given name int collectionID(const std::string& name) const; @@ -61,7 +58,7 @@ class CollectionIDTable { private: std::vector m_collectionIDs{}; std::vector m_names{}; - mutable std::unique_ptr m_mutex{std::make_unique()}; + mutable std::unique_ptr m_mutex{nullptr}; }; } // namespace podio diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index 5da2c22ef..b2fd37b16 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -84,7 +84,7 @@ class GenericParameters { using MutexPtr = std::unique_ptr; public: - GenericParameters() = default; + GenericParameters(); /// GenericParameters are copyable /// NOTE: This is currently mainly done to keep the ROOT I/O happy, because @@ -271,12 +271,12 @@ class GenericParameters { } private: - IntMap _intMap{}; ///< The map storing the integer values - mutable MutexPtr m_intMtx{std::make_unique()}; ///< The mutex guarding the integer map - FloatMap _floatMap{}; ///< The map storing the float values - mutable MutexPtr m_floatMtx{std::make_unique()}; ///< The mutex guarding the float map - StringMap _stringMap{}; ///< The map storing the double values - mutable MutexPtr m_stringMtx{std::make_unique()}; ///< The mutex guarding the float map + IntMap _intMap{}; ///< The map storing the integer values + mutable MutexPtr m_intMtx{nullptr}; ///< The mutex guarding the integer map + FloatMap _floatMap{}; ///< The map storing the float values + mutable MutexPtr m_floatMtx{nullptr}; ///< The mutex guarding the float map + StringMap _stringMap{}; ///< The map storing the double values + mutable MutexPtr m_stringMtx{nullptr}; ///< The mutex guarding the float map }; template diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 06069621b..c265c29a7 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -17,10 +17,10 @@ {% with collection_type = class.bare_type + 'Collection' %} {{ collection_type }}::{{ collection_type }}() : - m_isValid(false), m_isPrepared(false), m_isSubsetColl(false), m_collectionID(0), m_storage() {} + m_isValid(false), m_isPrepared(false), m_isSubsetColl(false), m_collectionID(0), m_storageMtx(std::make_unique()), m_storage() {} {{ collection_type }}::{{ collection_type }}({{ collection_type }}Data&& data, bool isSubsetColl) : - m_isValid(false), m_isPrepared(false), m_isSubsetColl(isSubsetColl), m_collectionID(0), m_storage(std::move(data)) {} + m_isValid(false), m_isPrepared(false), m_isSubsetColl(isSubsetColl), m_collectionID(0), m_storageMtx(std::make_unique()), m_storage(std::move(data)) {} {{ collection_type }}::~{{ collection_type }}() { // Need to tell the storage how to clean-up diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index 039fe0d6a..049ecff79 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -158,7 +158,7 @@ private: mutable bool m_isPrepared{false}; bool m_isSubsetColl{false}; int m_collectionID{0}; - mutable std::unique_ptr m_storageMtx{std::make_unique()}; + mutable std::unique_ptr m_storageMtx{nullptr}; mutable {{ class.bare_type }}CollectionData m_storage{}; }; diff --git a/src/CollectionIDTable.cc b/src/CollectionIDTable.cc index f999af077..e5ca3d15d 100644 --- a/src/CollectionIDTable.cc +++ b/src/CollectionIDTable.cc @@ -5,6 +5,17 @@ namespace podio { +CollectionIDTable::CollectionIDTable() : m_mutex(std::make_unique()) { +} + +CollectionIDTable::CollectionIDTable(std::vector&& ids, std::vector&& names) : + m_collectionIDs(std::move(ids)), m_names(std::move(names)), m_mutex(std::make_unique()) { +} + +CollectionIDTable::CollectionIDTable(const std::vector& ids, const std::vector& names) : + m_collectionIDs(ids), m_names(names), m_mutex(std::make_unique()) { +} + const std::string CollectionIDTable::name(int ID) const { std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_collectionIDs), end(m_collectionIDs), ID); diff --git a/src/GenericParameters.cc b/src/GenericParameters.cc index 4cf48d550..c1597d16f 100644 --- a/src/GenericParameters.cc +++ b/src/GenericParameters.cc @@ -5,6 +5,12 @@ namespace podio { +GenericParameters::GenericParameters() : + m_intMtx(std::make_unique()), + m_floatMtx(std::make_unique()), + m_stringMtx(std::make_unique()) { +} + GenericParameters::GenericParameters(const GenericParameters& other) : m_intMtx(std::make_unique()), m_floatMtx(std::make_unique()), From 579e69cb7b0a3051f79e4a2e119b40d0399553b6 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 19 Dec 2022 12:15:27 +0100 Subject: [PATCH 026/100] Improve the output of the SIOBlocks lib loading (#360) --- include/podio/SIOBlock.h | 12 +++++++++--- src/SIOBlock.cc | 33 +++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 7c9b2a462..123f8ef6a 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace podio { @@ -185,15 +186,20 @@ class SIOBlockFactory { class SIOBlockLibraryLoader { private: SIOBlockLibraryLoader(); + + /// Status code for loading shared SIOBlocks libraries + enum class LoadStatus : short { Success = 0, AlreadyLoaded = 1, Error = 2 }; + /** * Load a library with the given name via dlopen */ - void loadLib(const std::string& libname); + LoadStatus loadLib(const std::string& libname); + /** * Get all files that are found on LD_LIBRARY_PATH and that have "SioBlocks" - * in their name + * in their name together with the directory they are in */ - static std::vector getLibNames(); + static std::vector> getLibNames(); std::map _loadedLibs{}; diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index 55fa62871..97877adc0 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -140,33 +140,42 @@ std::shared_ptr SIOBlockFactory::createBlock(const podio::CollectionBa } SIOBlockLibraryLoader::SIOBlockLibraryLoader() { - for (const auto& lib : getLibNames()) { - loadLib(lib); + for (const auto& [lib, dir] : getLibNames()) { + const auto status = loadLib(lib); + switch (status) { + case LoadStatus::Success: + std::cout << "Loaded SIOBlocks library \'" << lib << "\' (from " << dir << ")" << std::endl; + break; + case LoadStatus::AlreadyLoaded: + std::cerr << "SIOBlocks library \'" << lib << "\' already loaded. Not loading again from " << dir << std::endl; + break; + case LoadStatus::Error: + std::cerr << "ERROR while loading SIOBlocks library \'" << lib << "\' (from " << dir << ")" << std::endl; + break; + } } } -void SIOBlockLibraryLoader::loadLib(const std::string& libname) { +SIOBlockLibraryLoader::LoadStatus SIOBlockLibraryLoader::loadLib(const std::string& libname) { if (_loadedLibs.find(libname) != _loadedLibs.end()) { - std::cerr << "SIOBlocks library \'" << libname << "\' already loaded. Not loading it again" << std::endl; - return; + return LoadStatus::AlreadyLoaded; } - void* libhandle = dlopen(libname.c_str(), RTLD_LAZY | RTLD_GLOBAL); if (libhandle) { - std::cout << "Loading SIOBlocks library \'" << libname << "\'" << std::endl; _loadedLibs.insert({libname, libhandle}); - } else { - std::cerr << "ERROR while loading SIOBlocks library \'" << libname << "\'" << std::endl; + return LoadStatus::Success; } + + return LoadStatus::Error; } -std::vector SIOBlockLibraryLoader::getLibNames() { +std::vector> SIOBlockLibraryLoader::getLibNames() { #ifdef USE_BOOST_FILESYSTEM namespace fs = boost::filesystem; #else namespace fs = std::filesystem; #endif - std::vector libs; + std::vector> libs; std::string dir; const auto ldLibPath = std::getenv("LD_LIBRARY_PATH"); @@ -182,7 +191,7 @@ std::vector SIOBlockLibraryLoader::getLibNames() { for (auto& lib : fs::directory_iterator(dir)) { const auto filename = lib.path().filename().string(); if (filename.find("SioBlocks") != std::string::npos) { - libs.emplace_back(std::move(filename)); + libs.emplace_back(std::move(filename), dir); } } } From b8ff7c13c9508e5a3c9d5e164920a9eab3dd9e87 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 19 Dec 2022 13:50:18 +0100 Subject: [PATCH 027/100] Release Notes for v00-16-02 --- doc/ReleaseNotes.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/ReleaseNotes.md b/doc/ReleaseNotes.md index aaca3f025..f588fc895 100644 --- a/doc/ReleaseNotes.md +++ b/doc/ReleaseNotes.md @@ -1,3 +1,14 @@ +# v00-16-02 + +* 2022-12-19 Thomas Madlener ([PR#360](https://github.com/AIDASoft/podio/pull/360)) + - Make the log output of loading the SIOBlock libraries more informative by also providing the absolute paths to the loaded (and rejected) shared libraries. + +* 2022-12-16 Thomas Madlener ([PR#333](https://github.com/AIDASoft/podio/pull/333)) + - Initialize the `unique_ptr` in the constructor initializer list instead of in the member variable declaration. This is more likely a bug in nvcc (or maybe a c++17 feature not yet supported by nvcc). Fixes key4hep/k4Clue#34 + - Pass `--disable-new-dtags` to the linker when using `PODIO_SET_RPATH`, to set `RPATH` and not `RUNPATH` in the binaries. + - Pin the ubuntu version for runners that build on ubuntu to not accidentally go out of sync with the underlying LCG releases. + - Disable the podio tests in the edm4hep workflows (see #359). + # v00-16-01 * 2022-12-06 jmcarcell ([PR#356](https://github.com/AIDASoft/podio/pull/356)) From f2ad9f03573aaee6f3b75874509f464461007294 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 19 Dec 2022 13:50:19 +0100 Subject: [PATCH 028/100] Updating version to v00-16-02 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 25063d7c8..b83e56ce1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ project(podio) #--- Version ------------------------------------------------------------------- SET( ${PROJECT_NAME}_VERSION_MAJOR 0 ) SET( ${PROJECT_NAME}_VERSION_MINOR 16 ) -SET( ${PROJECT_NAME}_VERSION_PATCH 1 ) +SET( ${PROJECT_NAME}_VERSION_PATCH 2 ) SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" ) From 7f4d2ee9dd3313f3a8364ea7bf3cdc9210b94713 Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Fri, 23 Dec 2022 09:34:45 +0100 Subject: [PATCH 029/100] Rename match -> result for python > 3.10 (#362) --- python/podio/podio_config_reader.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/python/podio/podio_config_reader.py b/python/podio/podio_config_reader.py index 14c34d0f5..a72bcce06 100644 --- a/python/podio/podio_config_reader.py +++ b/python/podio/podio_config_reader.py @@ -53,34 +53,34 @@ def _parse_with_regexps(string, regexps_callbacks): """Parse the string using the passed regexps and corresponding callbacks that take the match and return a MemberVariable from it""" for rgx, callback in regexps_callbacks: - match = rgx.match(string) - if match is not None: - return callback(match) + result = rgx.match(string) + if result: + return callback(result) raise DefinitionError(f"'{string}' is not a valid member definition") @staticmethod - def _full_array_conv(match): + def _full_array_conv(result): """MemberVariable construction for array members with a docstring""" - typ, size, name, def_val, comment = match.groups() + typ, size, name, def_val, comment = result.groups() return MemberVariable(name=name, array_type=typ, array_size=size, description=comment.strip(), default_val=def_val) @staticmethod - def _full_member_conv(match): + def _full_member_conv(result): """MemberVariable construction for members with a docstring""" - klass, name, def_val, comment = match.groups() + klass, name, def_val, comment = result.groups() return MemberVariable(name=name, type=klass, description=comment.strip(), default_val=def_val) @staticmethod - def _bare_array_conv(match): + def _bare_array_conv(result): """MemberVariable construction for array members without docstring""" - typ, size, name, def_val = match.groups() + typ, size, name, def_val = result.groups() return MemberVariable(name=name, array_type=typ, array_size=size, default_val=def_val) @staticmethod - def _bare_member_conv(match): + def _bare_member_conv(result): """MemberVarible construction for members without docstring""" - klass, name, def_val = match.groups() + klass, name, def_val = result.groups() return MemberVariable(name=name, type=klass, default_val=def_val) def parse(self, string, require_description=True): From edc51e7ec22c084fbf6ba08a98fc4364516f490a Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Tue, 10 Jan 2023 13:35:10 +0100 Subject: [PATCH 030/100] Fix the pre-commit workflow (#365) --- .github/workflows/pre-commit.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 5f8eaf5fe..187432d38 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -15,8 +15,9 @@ jobs: echo "::group::Setup pre-commit" export PYTHONPATH=$(python -m site --user-site):$PYTHONPATH export PATH=/root/.local/bin:$PATH - pip install argparse --user - pip install pre-commit --user + pip install pre-commit + # Use virtualenv from the LCG release + pip uninstall --yes virtualenv echo "::endgroup::" echo "::group::Run CMake" mkdir build From 0c3c02cc72347faeb5e60e937312d46b5a1e8e96 Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Wed, 11 Jan 2023 17:44:54 +0100 Subject: [PATCH 031/100] Deal with missing files in the readers (#355) --- src/ROOTFrameReader.cc | 8 +++++++- src/ROOTLegacyReader.cc | 6 +++++- src/ROOTReader.cc | 8 +++++++- src/SIOFrameReader.cc | 2 +- src/SIOLegacyReader.cc | 2 +- src/SIOReader.cc | 3 +++ tests/CMakeLists.txt | 6 +++++- tests/read.cpp | 8 +++++++- tests/read_frame.h | 7 ++++++- tests/read_frame_legacy_root.cpp | 7 ++++++- tests/read_frame_legacy_sio.cpp | 7 ++++++- tests/read_sio.cpp | 8 +++++++- tests/unittest.cpp | 32 ++++++++++++++++++++++++++++++++ 13 files changed, 93 insertions(+), 11 deletions(-) diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index 93ac7d9e8..e3d6c6aba 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -12,6 +12,7 @@ #include "TTree.h" #include "TTreeCache.h" +#include #include namespace podio { @@ -201,7 +202,12 @@ void ROOTFrameReader::openFiles(const std::vector& filenames) { // NOTE: We simply assume that the meta data doesn't change throughout the // chain! This essentially boils down to the assumption that all files that // are read this way were written with the same settings. - m_metaChain->Add(filenames[0].c_str()); + // Reading all files is done to check that all file exists + for (const auto& filename : filenames) { + if (!m_metaChain->Add(filename.c_str(), -1)) { + throw std::runtime_error("File " + filename + " couldn't be found"); + } + } podio::version::Version* versionPtr{nullptr}; if (auto* versionBranch = root_utils::getBranch(m_metaChain.get(), root_utils::versionBranchName)) { diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index ac718d14a..f25c1b6bc 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -135,7 +135,11 @@ void ROOTLegacyReader::openFile(const std::string& filename) { void ROOTLegacyReader::openFiles(const std::vector& filenames) { m_chain = std::make_unique("events"); for (const auto& filename : filenames) { - m_chain->Add(filename.c_str()); + //-1 forces the headers to be read so that + // the validity of the files can be checked + if (!m_chain->Add(filename.c_str(), -1)) { + throw std::runtime_error("File " + filename + " couldn't be found"); + } } // read the meta data and build the collectionBranches cache diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index c34cef9c0..b2cb33100 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -13,6 +13,7 @@ #include "TTree.h" #include "TTreeCache.h" #include +#include namespace podio { // todo: see https://github.com/AIDASoft/podio/issues/290 @@ -143,7 +144,12 @@ void ROOTReader::openFile(const std::string& filename) { void ROOTReader::openFiles(const std::vector& filenames) { m_chain = new TChain("events"); for (const auto& filename : filenames) { - m_chain->Add(filename.c_str()); + //-1 forces the headers to be read so that + // the validity of the files can be checked + if (!m_chain->Add(filename.c_str(), -1)) { + delete m_chain; + throw std::runtime_error("File " + filename + " couldn't be found"); + } } // read the meta data and build the collectionBranches cache diff --git a/src/SIOFrameReader.cc b/src/SIOFrameReader.cc index d987d6a74..47f5ec082 100644 --- a/src/SIOFrameReader.cc +++ b/src/SIOFrameReader.cc @@ -17,7 +17,7 @@ SIOFrameReader::SIOFrameReader() { void SIOFrameReader::openFile(const std::string& filename) { m_stream.open(filename, std::ios::binary); if (!m_stream.is_open()) { - SIO_THROW(sio::error_code::not_open, "Cannot open input file '" + filename + "' for reading"); + throw std::runtime_error("File " + filename + " couldn't be opened"); } // NOTE: reading TOC record first because that jumps back to the start of the file! diff --git a/src/SIOLegacyReader.cc b/src/SIOLegacyReader.cc index d5329ca5d..f1bd23af4 100644 --- a/src/SIOLegacyReader.cc +++ b/src/SIOLegacyReader.cc @@ -16,7 +16,7 @@ SIOLegacyReader::SIOLegacyReader() { void SIOLegacyReader::openFile(const std::string& filename) { m_stream.open(filename, std::ios::binary); if (!m_stream.is_open()) { - SIO_THROW(sio::error_code::not_open, "Cannot open input file '" + filename + "' for reading"); + throw std::runtime_error("File " + filename + " couldn't be opened"); } // NOTE: reading TOC record first because that jumps back to the start of the file! diff --git a/src/SIOReader.cc b/src/SIOReader.cc index 65430a3a2..69c398e10 100644 --- a/src/SIOReader.cc +++ b/src/SIOReader.cc @@ -61,6 +61,9 @@ podio::GenericParameters* SIOReader::readEventMetaData() { void SIOReader::openFile(const std::string& filename) { m_stream.open(filename, std::ios::binary); + if (!this->isValid()) { + throw std::runtime_error("File " + filename + " couldn't be found"); + } readCollectionIDTable(); if (!readFileTOCRecord()) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f32ce7b1b..335b5b433 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -186,7 +186,11 @@ configure_file(CTestCustom.cmake ${CMAKE_BINARY_DIR}/CTestCustom.cmake) find_package(Threads REQUIRED) add_executable(unittest unittest.cpp frame.cpp) -target_link_libraries(unittest PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads) +target_link_libraries(unittest PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioRootIO) +if (ENABLE_SIO) + target_link_libraries(unittest PRIVATE podio::podioSioIO) + target_compile_definitions(unittest PRIVATE PODIO_WITH_SIO) +endif() # The unittests are a bit better and they are labelled so we can put together a # list of labels that we want to ignore diff --git a/tests/read.cpp b/tests/read.cpp index 066b817d6..d21ad81b0 100644 --- a/tests/read.cpp +++ b/tests/read.cpp @@ -3,7 +3,13 @@ int main() { auto reader = podio::ROOTReader(); - reader.openFile("example.root"); + try { + reader.openFile("example.root"); + } catch (const std::runtime_error& e) { + std::cout << "File could not be opened, aborting." << std::endl; + return 1; + } + if (reader.currentFileVersion() != podio::version::build_version) { return 1; } diff --git a/tests/read_frame.h b/tests/read_frame.h index b48089cd1..28d382192 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -10,7 +10,12 @@ template int read_frames(const std::string& filename) { auto reader = ReaderT(); - reader.openFile(filename); + try { + reader.openFile(filename); + } catch (const std::runtime_error& e) { + std::cout << "File could not be opened, aborting." << std::endl; + return 1; + } if (reader.currentFileVersion() != podio::version::build_version) { std::cerr << "The podio build version could not be read back correctly. " diff --git a/tests/read_frame_legacy_root.cpp b/tests/read_frame_legacy_root.cpp index 9dfdfcc7a..f1038eabc 100644 --- a/tests/read_frame_legacy_root.cpp +++ b/tests/read_frame_legacy_root.cpp @@ -7,7 +7,12 @@ int main() { auto reader = podio::ROOTLegacyReader(); - reader.openFile("example.root"); + try { + reader.openFile("example.root"); + } catch (const std::runtime_error& e) { + std::cout << "File could not be opened, aborting." << std::endl; + return 1; + } if (reader.currentFileVersion() != podio::version::build_version) { std::cerr << "The podio build version could not be read back correctly. " diff --git a/tests/read_frame_legacy_sio.cpp b/tests/read_frame_legacy_sio.cpp index a29a0bb18..b8fbf009f 100644 --- a/tests/read_frame_legacy_sio.cpp +++ b/tests/read_frame_legacy_sio.cpp @@ -7,7 +7,12 @@ int main() { auto reader = podio::SIOLegacyReader(); - reader.openFile("example.sio"); + try { + reader.openFile("example.sio"); + } catch (const std::runtime_error& e) { + std::cout << "File could not be opened, aborting." << std::endl; + return 1; + } if (reader.currentFileVersion() != podio::version::build_version) { std::cerr << "The podio build version could not be read back correctly. " diff --git a/tests/read_sio.cpp b/tests/read_sio.cpp index d1fd8c8c9..85ef72332 100644 --- a/tests/read_sio.cpp +++ b/tests/read_sio.cpp @@ -4,7 +4,13 @@ int main() { // auto reader = podio::SIOReader(); podio::SIOReader reader; // SIOReader has no copy c'tor ... - reader.openFile("example.sio"); + try { + reader.openFile("example.sio"); + } catch (const std::runtime_error& e) { + std::cout << "File could not be opened, aborting." << std::endl; + return 1; + } + if (reader.currentFileVersion() != podio::version::build_version) { return 1; } diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 17b39f06b..218c58b17 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -11,7 +11,15 @@ // podio specific includes #include "podio/EventStore.h" #include "podio/GenericParameters.h" +#include "podio/ROOTFrameReader.h" +#include "podio/ROOTLegacyReader.h" +#include "podio/ROOTReader.h" #include "podio/podioVersion.h" +#ifdef PODIO_WITH_SIO + #include "podio/SIOFrameReader.h" + #include "podio/SIOLegacyReader.h" + #include "podio/SIOReader.h" +#endif // Test data types #include "datamodel/EventInfoCollection.h" @@ -979,6 +987,30 @@ TEST_CASE("GenericParameters return types", "[generic-parameters][static-checks] // strings } +TEST_CASE("Missing files (ROOT readers)", "[basics]") { + auto root_reader = podio::ROOTReader(); + REQUIRE_THROWS_AS(root_reader.openFile("NonExistentFile.root"), std::runtime_error); + + auto root_legacy_reader = podio::ROOTLegacyReader(); + REQUIRE_THROWS_AS(root_legacy_reader.openFile("NonExistentFile.root"), std::runtime_error); + + auto root_frame_reader = podio::ROOTFrameReader(); + REQUIRE_THROWS_AS(root_frame_reader.openFile("NonExistentFile.root"), std::runtime_error); +} + +#ifdef PODIO_WITH_SIO +TEST_CASE("Missing files (SIO readers)", "[basics]") { + auto sio_reader = podio::SIOReader(); + REQUIRE_THROWS_AS(sio_reader.openFile("NonExistentFile.sio"), std::runtime_error); + + auto sio_legacy_reader = podio::SIOLegacyReader(); + REQUIRE_THROWS_AS(sio_legacy_reader.openFile("NonExistentFile.sio"), std::runtime_error); + + auto sio_frame_reader = podio::SIOFrameReader(); + REQUIRE_THROWS_AS(sio_frame_reader.openFile("NonExistentFile.root"), std::runtime_error); +} +#endif + #ifdef PODIO_JSON_OUTPUT #include "nlohmann/json.hpp" From f6535cbff04b958a65589275931f3d23527c8ac2 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 16 Jan 2023 17:16:47 +0100 Subject: [PATCH 032/100] Add basic I/O tests for datamodel extensions (#361) * Add basic I/O tests for datamodel extensions * Generate and build extension model earlier * Add extension datamodel to gitignore Co-authored-by: Valentin Volkl --- .gitignore | 1 + python/podio/test_Frame.py | 5 ++- tests/CMakeLists.txt | 34 +++++++++---------- tests/datalayout_extension.yaml | 2 +- tests/read_frame.h | 58 +++++++++++++++++++++++++++++++++ tests/write_frame.h | 55 +++++++++++++++++++++++++++++++ 6 files changed, 136 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 568832339..a838c3f20 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ build install tests/src tests/datamodel +tests/extension_model # Python *pyc diff --git a/python/podio/test_Frame.py b/python/podio/test_Frame.py index 9e67077a5..31e510ba0 100644 --- a/python/podio/test_Frame.py +++ b/python/podio/test_Frame.py @@ -15,6 +15,9 @@ 'WithNamespaceRelation', 'WithNamespaceRelationCopy', 'emptyCollection', 'emptySubsetColl' } +# The expected collections from the extension (only present in the other_events category) +EXPECTED_EXTENSION_COLL_NAMES = {"extension_Contained", "extension_ExternalComponent", "extension_ExternalRelation"} + # The expected parameter names in each frame EXPECTED_PARAM_NAMES = {'anInt', 'UserEventWeight', 'UserEventName', 'SomeVectorData'} @@ -50,7 +53,7 @@ def setUp(self): def test_frame_collections(self): """Check that all expected collections are available.""" self.assertEqual(set(self.event.collections), EXPECTED_COLL_NAMES) - self.assertEqual(set(self.other_event.collections), EXPECTED_COLL_NAMES) + self.assertEqual(set(self.other_event.collections), EXPECTED_COLL_NAMES.union(EXPECTED_EXTENSION_COLL_NAMES)) # Not going over all collections here, as that should all be covered by the # c++ test cases; Simply picking a few and doing some basic tests diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 335b5b433..ad24cbf31 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -20,13 +20,28 @@ endif() PODIO_ADD_ROOT_IO_DICT(TestDataModelDict TestDataModel "${headers}" src/selection.xml) PODIO_ADD_SIO_IO_BLOCKS(TestDataModel "${headers}" "${sources}") +# Build the extension data model and link it against the upstream model +PODIO_GENERATE_DATAMODEL(extension_model datalayout_extension.yaml ext_headers ext_sources + UPSTREAM_EDM datamodel:datalayout.yaml + IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} + OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}/extension_model) + +PODIO_ADD_DATAMODEL_CORE_LIB(ExtensionDataModel "${ext_headers}" "${ext_sources}" + OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}/extension_model) +target_link_libraries(ExtensionDataModel PUBLIC TestDataModel) + +PODIO_ADD_ROOT_IO_DICT(ExtensionDataModelDict ExtensionDataModel "${ext_headers}" ${CMAKE_CURRENT_SOURCE_DIR}/extension_model/src/selection.xml + OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}/extension_model) + +PODIO_ADD_SIO_IO_BLOCKS(ExtensionDataModel "${ext_headers}" "${ext_sources}") + #--- small utility helper function to allow for a more terse definition of tests below function(CREATE_PODIO_TEST sourcefile additional_libs) string( REPLACE ".cpp" "" name ${sourcefile} ) add_executable( ${name} ${sourcefile} ) add_test(NAME ${name} COMMAND ${name}) - target_link_libraries(${name} TestDataModel ${additional_libs}) + target_link_libraries(${name} TestDataModel ExtensionDataModel ${additional_libs}) set_property(TEST ${name} PROPERTY ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH} # Clear the ROOT_INCLUDE_PATH for the tests, to avoid potential conflicts @@ -47,7 +62,7 @@ set(root_dependent_tests read_frame.cpp write_frame_root.cpp read_frame_legacy_root.cpp) -set(root_libs TestDataModelDict podio::podioRootIO) +set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO) foreach( sourcefile ${root_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${root_libs}") endforeach() @@ -230,18 +245,3 @@ else() LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} ) endif() - -# Build the extension data model and link it against the upstream model -PODIO_GENERATE_DATAMODEL(extension_model datalayout_extension.yaml ext_headers ext_sources - UPSTREAM_EDM datamodel:datalayout.yaml - IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} - OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}/extension_model) - -PODIO_ADD_DATAMODEL_CORE_LIB(ExtensionDataModel "${ext_headers}" "${ext_sources}" - OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}/extension_model) -target_link_libraries(ExtensionDataModel PUBLIC TestDataModel) - -PODIO_ADD_ROOT_IO_DICT(ExtensionDataModelDict ExtensionDataModel "${ext_headers}" ${CMAKE_CURRENT_SOURCE_DIR}/extension_model/src/selection.xml - OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}/extension_model) - -PODIO_ADD_SIO_IO_BLOCKS(ExtensionDataModel "${ext_headers}" "${ext_sources}") diff --git a/tests/datalayout_extension.yaml b/tests/datalayout_extension.yaml index 6a450f187..9a5c7cd03 100644 --- a/tests/datalayout_extension.yaml +++ b/tests/datalayout_extension.yaml @@ -17,7 +17,7 @@ components: - ex2::NamespaceStruct nspStruct datatypes: - extenstion::ContainedType: + extension::ContainedType: Author: "T. Madlener" Description: "A datatype in the extension that is self-contained" Members: diff --git a/tests/read_frame.h b/tests/read_frame.h index 28d382192..76d4f0057 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -3,10 +3,63 @@ #include "read_test.h" +#include "extension_model/ContainedTypeCollection.h" +#include "extension_model/ExternalComponentTypeCollection.h" +#include "extension_model/ExternalRelationTypeCollection.h" + #include "podio/Frame.h" #include +#define ASSERT(condition, msg) \ + if (!(condition)) { \ + throw std::runtime_error(msg); \ + } + +void processExtensions(const podio::Frame& event, int iEvent, podio::version::Version) { + const auto& extColl = event.get("extension_Contained"); + ASSERT(extColl.isValid(), "extension_Contained collection should be present"); + ASSERT(extColl.size() == 1, "extension_Contained collection should have one element"); + auto extElem = extColl[0]; + const auto& polVec = extElem.getAVector(); + ASSERT(polVec.r == iEvent * 123.f, "polVec.r value not as expected"); + ASSERT(polVec.phi == 0.15f, "polVec.phi value not as epxected"); + ASSERT(polVec.rho == 3.14f, "polVec.phi value not as epxected"); + + const auto& extCompColl = event.get("extension_ExternalComponent"); + ASSERT(extCompColl.isValid(), "extension_ExternalComponent collection should be present"); + ASSERT(extCompColl.size() == 1, "extension_ExternalComponent should have one element"); + auto extCompElem = extCompColl[0]; + ASSERT((extCompElem.getAStruct().p == std::array{iEvent, iEvent - 2, iEvent + 4, iEvent * 8}), + "aStruct.p value not as expected"); + ASSERT(extCompElem.getAComponent().aStruct.data.x == 42 * iEvent, "aComponent.aStruct.x value not as expected"); + ASSERT(extCompElem.getAComponent().nspStruct.x == iEvent, "aComponent.nspStruct.x value not as expected"); + ASSERT(extCompElem.getAComponent().nspStruct.y == iEvent * 2, "aComponent.nspStruct.y value not as expected"); + + const auto& extRelColl = event.get("extension_ExternalRelation"); + ASSERT(extRelColl.isValid(), "extension_ExternalRelation collection should be present"); + ASSERT(extRelColl.size() == 3, "extension_ExternalRelation collection should contain 3 elements"); + + const auto& hits = event.get("hits"); + auto elem0 = extRelColl[0]; + ASSERT(elem0.getWeight() == iEvent * 100.f, "weight of first element not as expected"); + ASSERT(elem0.getSingleHit() == hits[0], "single hit relation not as expected"); + + const auto& clusters = event.get("clusters"); + auto elem1 = extRelColl[1]; + const auto relClusters = elem1.getClusters(); + ASSERT(relClusters.size() == 2, "element should have two related clusters"); + ASSERT(relClusters[0] == clusters[1], "first related cluster not as expected"); + ASSERT(relClusters[1] == clusters[0], "first related cluster not as expected"); + + auto elem2 = extRelColl[2]; + const auto& structs = elem2.getSomeStructs(); + ASSERT(structs.size() == 3, "element should have 3 struct vector members"); + ASSERT(structs[0].y == 0, "struct value not as expected"); + ASSERT(structs[1].y == iEvent, "struct value not as expected"); + ASSERT(structs[2].y == 2 * iEvent, "struct value not as expected"); +} + template int read_frames(const std::string& filename) { auto reader = ReaderT(); @@ -53,6 +106,8 @@ int read_frames(const std::string& filename) { auto otherFrame = podio::Frame(reader.readNextEntry("other_events")); processEvent(otherFrame, i + 100, reader.currentFileVersion()); + // The other_events category also holds external collections + processExtensions(otherFrame, i + 100, reader.currentFileVersion()); } if (reader.readNextEntry("events")) { @@ -76,9 +131,12 @@ int read_frames(const std::string& filename) { auto otherFrame = podio::Frame(reader.readEntry("other_events", 4)); processEvent(otherFrame, 4 + 100, reader.currentFileVersion()); + processExtensions(otherFrame, 4 + 100, reader.currentFileVersion()); + // Jumping back also works auto previousFrame = podio::Frame(reader.readEntry("other_events", 2)); processEvent(previousFrame, 2 + 100, reader.currentFileVersion()); + processExtensions(previousFrame, 2 + 100, reader.currentFileVersion()); // Trying to read a Frame that is not present returns a nullptr if (reader.readEntry("events", 10)) { diff --git a/tests/write_frame.h b/tests/write_frame.h index b957fa63b..85b50d7e3 100644 --- a/tests/write_frame.h +++ b/tests/write_frame.h @@ -13,6 +13,10 @@ #include "datamodel/ExampleWithOneRelationCollection.h" #include "datamodel/ExampleWithVectorMemberCollection.h" +#include "extension_model/ContainedTypeCollection.h" +#include "extension_model/ExternalComponentTypeCollection.h" +#include "extension_model/ExternalRelationTypeCollection.h" + #include "podio/Frame.h" #include "podio/UserDataCollection.h" @@ -326,6 +330,53 @@ auto createNamespaceRelationCollection(int i) { return retVal; } +auto createExtensionContainedCollection(int i) { + auto coll = extension::ContainedTypeCollection(); + // The ContainedType only has a polar vector + auto contElem = coll.create(); + contElem.setAVector({i * 123.f, 0.15f, 3.14f}); + + return coll; +} + +auto createExtensionExternalComponentCollection(int i) { + auto coll = extension::ExternalComponentTypeCollection(); + + // Set the upstream components only here + auto extCompElem = coll.create(); + extension::ExtComponent extComp{}; + extComp.aStruct.data.x = 42 * i; + extComp.nspStruct = ex2::NamespaceStruct{i, 2 * i}; + extCompElem.setAComponent(extComp); + SimpleStruct simpleS{}; + simpleS.p = {i, i - 2, i + 4, i * 8}; + extCompElem.setAStruct(simpleS); + + return coll; +} + +auto createExtensionExternalRelationCollection(int i, const ExampleHitCollection& hits, + const ExampleClusterCollection& clusters) { + auto coll = extension::ExternalRelationTypeCollection(); + + auto elem0 = coll.create(); + elem0.setWeight(i * 100.f); + elem0.setSingleHit(hits[0]); + + auto elem1 = coll.create(); + elem1.addToClusters(clusters[1]); + elem1.addToClusters(clusters[0]); + + auto elem2 = coll.create(); + for (int j = 0; j < 3; j++) { + auto s = SimpleStruct(); + s.y = j * i; + elem2.addToSomeStructs(s); + } + + return coll; +} + podio::Frame makeFrame(int iFrame) { podio::Frame frame{}; @@ -377,6 +428,10 @@ podio::Frame makeFrame(int iFrame) { emptySubsetColl.setSubsetCollection(); frame.put(std::move(emptySubsetColl), "emptySubsetColl"); + frame.put(createExtensionContainedCollection(iFrame), "extension_Contained"); + frame.put(createExtensionExternalComponentCollection(iFrame), "extension_ExternalComponent"); + frame.put(createExtensionExternalRelationCollection(iFrame, hits, clusters), "extension_ExternalRelation"); + return frame; } From 52064ba2217f45c27eb992f2b295d4be1508fa52 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 16 Jan 2023 17:18:32 +0100 Subject: [PATCH 033/100] Use sio utility functionality for writing also in legacy writer (#363) * Use defaultdict instead of essentially hand-rolling one * Fix cmake configure dependencies again after #343 * Move SIO utilities to existing private utils header * Use sio_utils also for legacy writer --- cmake/podioMacros.cmake | 4 +- include/podio/SIOWriter.h | 4 -- python/podio_class_generator.py | 5 +-- src/SIOFrameWriter.cc | 72 +------------------------------- src/SIOWriter.cc | 62 ++++----------------------- src/sioUtils.h | 74 ++++++++++++++++++++++++++++++++- 6 files changed, 87 insertions(+), 134 deletions(-) diff --git a/cmake/podioMacros.cmake b/cmake/podioMacros.cmake index 0cd41b4d6..0541317e6 100644 --- a/cmake/podioMacros.cmake +++ b/cmake/podioMacros.cmake @@ -182,8 +182,8 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR ${YAML_FILE} ${PODIO_TEMPLATES} ${podio_PYTHON_DIR}/podio_class_generator.py - ${podio_PYTHON_DIR}/generator_utils.py - ${podio_PYTHON_DIR}/podio_config_reader.py + ${podio_PYTHON_DIR}/podio/generator_utils.py + ${podio_PYTHON_DIR}/podio/podio_config_reader.py ) message(STATUS "Creating '${datamodel}' datamodel") diff --git a/include/podio/SIOWriter.h b/include/podio/SIOWriter.h index 3255681d9..af2a9ffff 100644 --- a/include/podio/SIOWriter.h +++ b/include/podio/SIOWriter.h @@ -6,9 +6,7 @@ #include "podio/SIOBlock.h" // SIO specific includes -#include #include -#include #include #include @@ -41,8 +39,6 @@ class SIOWriter { std::shared_ptr m_eventMetaData; sio::ofstream m_stream{}; - sio::buffer m_buffer{sio::mbyte}; - sio::buffer m_com_buffer{sio::mbyte}; bool m_firstEvent{true}; std::shared_ptr m_runMetaData; diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index f07d83c5e..7c6fc5b40 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -10,6 +10,7 @@ from enum import IntEnum from collections.abc import Mapping +from collections import defaultdict from itertools import zip_longest @@ -231,13 +232,11 @@ def _process_datatype(self, name, definition): def _preprocess_for_obj(self, datatype): """Do the preprocessing that is necessary for the Obj classes""" - fwd_declarations = {} + fwd_declarations = defaultdict(list) includes, includes_cc = set(), set() for relation in datatype['OneToOneRelations']: if relation.full_type != datatype['class'].full_type: - if relation.namespace not in fwd_declarations: - fwd_declarations[relation.namespace] = [] fwd_declarations[relation.namespace].append(relation.bare_type) includes_cc.add(self._build_include(relation)) diff --git a/src/SIOFrameWriter.cc b/src/SIOFrameWriter.cc index b1c975037..f9ab6bdd9 100644 --- a/src/SIOFrameWriter.cc +++ b/src/SIOFrameWriter.cc @@ -5,82 +5,12 @@ #include "podio/GenericParameters.h" #include "podio/SIOBlock.h" -#include -#include -#include -#include +#include "sioUtils.h" #include namespace podio { -namespace sio_utils { - using StoreCollection = std::pair; - - std::shared_ptr createCollIDBlock(const std::vector& collections, - const podio::CollectionIDTable& collIdTable) { - // Need to make sure that the type names and subset collection bits are in - // the same order here! - std::vector types; - types.reserve(collections.size()); - std::vector subsetColl; - subsetColl.reserve(collections.size()); - std::vector names; - names.reserve(collections.size()); - std::vector ids; - ids.reserve(collections.size()); - - for (const auto& [name, coll] : collections) { - names.emplace_back(name); - ids.emplace_back(collIdTable.collectionID(name)); - types.emplace_back(coll->getValueTypeName()); - subsetColl.emplace_back(coll->isSubsetCollection()); - } - - return std::make_shared(std::move(names), std::move(ids), std::move(types), - std::move(subsetColl)); - } - - sio::block_list createBlocks(const std::vector& collections, - const podio::GenericParameters& parameters) { - sio::block_list blocks; - blocks.reserve(collections.size() + 1); // parameters + collections - - auto paramBlock = std::make_shared(); - // TODO: get rid of const_cast - paramBlock->metadata = const_cast(¶meters); - blocks.emplace_back(std::move(paramBlock)); - - for (const auto& [name, col] : collections) { - blocks.emplace_back(podio::SIOBlockFactory::instance().createBlock(col, name)); - } - - return blocks; - } - - // Write the passed record and return where it starts in the file - sio::ifstream::pos_type writeRecord(const sio::block_list& blocks, const std::string& recordName, - sio::ofstream& stream, std::size_t initBufferSize = sio::mbyte, - bool compress = true) { - auto buffer = sio::buffer{initBufferSize}; - auto recInfo = sio::api::write_record(recordName, buffer, blocks, 0); - - if (compress) { - // use zlib to compress the record into another buffer - sio::zlib_compression compressor; - compressor.set_level(6); // Z_DEFAULT_COMPRESSION==6 - auto comBuffer = sio::buffer{initBufferSize}; - sio::api::compress_record(recInfo, buffer, comBuffer, compressor); - - sio::api::write_record(stream, buffer.span(0, recInfo._header_length), comBuffer.span(), recInfo); - } else { - sio::api::write_record(stream, buffer.span(), recInfo); - } - - return recInfo._file_start; - } -} // namespace sio_utils - SIOFrameWriter::SIOFrameWriter(const std::string& filename) { m_stream.open(filename, std::ios::binary); if (!m_stream.is_open()) { diff --git a/src/SIOWriter.cc b/src/SIOWriter.cc index 782ddbd58..cced3e28c 100644 --- a/src/SIOWriter.cc +++ b/src/SIOWriter.cc @@ -4,6 +4,8 @@ #include "podio/EventStore.h" #include "podio/SIOBlock.h" +#include "sioUtils.h" + // SIO specifc includes #include "sio/block.h" #include "sio/compression/zlib.h" @@ -41,23 +43,8 @@ void SIOWriter::writeEvent() { m_firstEvent = false; } - m_buffer.clear(); - m_com_buffer.clear(); - auto blocks = createBlocks(); - - // write the record to the sio buffer - auto rec_info = sio::api::write_record("event_record", m_buffer, blocks, 0); - - // use zlib to compress the record into another buffer - sio::zlib_compression compressor; - compressor.set_level(6); // Z_DEFAULT_COMPRESSION==6 - sio::api::compress_record(rec_info, m_buffer, m_com_buffer, compressor); - - // and now write record to the file ! - sio::api::write_record(m_stream, m_buffer.span(0, rec_info._header_length), m_com_buffer.span(), rec_info); - - m_tocRecord.addRecord("event_record", rec_info._file_start); + m_tocRecord.addRecord("event_record", sio_utils::writeRecord(blocks, "event_record", m_stream)); } sio::block_list SIOWriter::createBlocks() const { @@ -76,47 +63,26 @@ sio::block_list SIOWriter::createBlocks() const { } void SIOWriter::finish() { - m_buffer.clear(); - m_com_buffer.clear(); - sio::block_list blocks{}; blocks.push_back(m_runMetaData); - auto rec_info = sio::api::write_record(m_runMetaData->name(), m_buffer, blocks, 0); - sio::zlib_compression compressor; - compressor.set_level(6); - sio::api::compress_record(rec_info, m_buffer, m_com_buffer, compressor); - sio::api::write_record(m_stream, m_buffer.span(0, rec_info._header_length), m_com_buffer.span(), rec_info); - - m_tocRecord.addRecord(m_runMetaData->name(), rec_info._file_start); + m_tocRecord.addRecord(m_runMetaData->name(), sio_utils::writeRecord(blocks, m_runMetaData->name(), m_stream)); blocks.clear(); - m_buffer.clear(); - m_com_buffer.clear(); - blocks.push_back(m_collectionMetaData); - rec_info = sio::api::write_record(m_collectionMetaData->name(), m_buffer, blocks, 0); - sio::api::compress_record(rec_info, m_buffer, m_com_buffer, compressor); - sio::api::write_record(m_stream, m_buffer.span(0, rec_info._header_length), m_com_buffer.span(), rec_info); - - m_tocRecord.addRecord(m_collectionMetaData->name(), rec_info._file_start); + m_tocRecord.addRecord(m_collectionMetaData->name(), + sio_utils::writeRecord(blocks, m_collectionMetaData->name(), m_stream)); blocks.clear(); - m_buffer.clear(); - m_com_buffer.clear(); - auto tocRecordBlock = std::make_shared(); tocRecordBlock->record = &m_tocRecord; blocks.push_back(tocRecordBlock); - rec_info = sio::api::write_record(sio_helpers::SIOTocRecordName, m_buffer, blocks, 0); - sio::api::compress_record(rec_info, m_buffer, m_com_buffer, compressor); - sio::api::write_record(m_stream, m_buffer.span(0, rec_info._header_length), m_com_buffer.span(), rec_info); - + const auto tocStartPos = sio_utils::writeRecord(blocks, sio_helpers::SIOTocRecordName, m_stream); // Now that we know the position of the TOC Record, put this information // into a final marker that can be identified and interpreted when reading // again - uint64_t finalWords = (((uint64_t)sio_helpers::SIOTocMarker) << 32) | ((uint64_t)rec_info._file_start & 0xffffffff); + uint64_t finalWords = (((uint64_t)sio_helpers::SIOTocMarker) << 32) | ((uint64_t)tocStartPos & 0xffffffff); m_stream.write(reinterpret_cast(&finalWords), sizeof(finalWords)); m_stream.close(); @@ -140,21 +106,11 @@ void SIOWriter::registerForWrite(const std::string& name) { } void SIOWriter::writeCollectionIDTable() { - m_buffer.clear(); - m_com_buffer.clear(); sio::block_list blocks; blocks.emplace_back(std::make_shared(m_store)); blocks.emplace_back(std::make_shared(podio::version::build_version)); - auto rec_info = sio::api::write_record("file_metadata", m_buffer, blocks, 0); - - sio::zlib_compression compressor; - compressor.set_level(6); - sio::api::compress_record(rec_info, m_buffer, m_com_buffer, compressor); - - sio::api::write_record(m_stream, m_buffer.span(0, rec_info._header_length), m_com_buffer.span(), rec_info); - - m_tocRecord.addRecord("file_metadata", rec_info._file_start); + m_tocRecord.addRecord("file_metadata", sio_utils::writeRecord(blocks, "file_metadata", m_stream)); } } // namespace podio diff --git a/src/sioUtils.h b/src/sioUtils.h index 8401ac3cf..6e340d8fa 100644 --- a/src/sioUtils.h +++ b/src/sioUtils.h @@ -1,6 +1,10 @@ #ifndef PODIO_SIO_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy #define PODIO_SIO_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy +#include "podio/CollectionBase.h" +#include "podio/GenericParameters.h" +#include "podio/SIOBlock.h" + #include #include #include @@ -9,7 +13,7 @@ namespace podio { namespace sio_utils { - // Read the record into a buffer and potentially uncompress it + /// Read the record into a buffer and potentially uncompress it inline std::pair readRecord(sio::ifstream& stream, bool decompress = true, std::size_t initBufferSize = sio::mbyte) { sio::record_info recInfo; @@ -27,6 +31,74 @@ namespace sio_utils { return std::make_pair(std::move(recBuffer), recInfo); } + + using StoreCollection = std::pair; + + /// Create the collection ID block from the passed collections + inline std::shared_ptr createCollIDBlock(const std::vector& collections, + const podio::CollectionIDTable& collIdTable) { + // Need to make sure that the type names and subset collection bits are in + // the same order here! + std::vector types; + types.reserve(collections.size()); + std::vector subsetColl; + subsetColl.reserve(collections.size()); + std::vector names; + names.reserve(collections.size()); + std::vector ids; + ids.reserve(collections.size()); + + for (const auto& [name, coll] : collections) { + names.emplace_back(name); + ids.emplace_back(collIdTable.collectionID(name)); + types.emplace_back(coll->getValueTypeName()); + subsetColl.emplace_back(coll->isSubsetCollection()); + } + + return std::make_shared(std::move(names), std::move(ids), std::move(types), + std::move(subsetColl)); + } + + /// Create all blocks to store the passed collections and parameters into a record + inline sio::block_list createBlocks(const std::vector& collections, + const podio::GenericParameters& parameters) { + sio::block_list blocks; + blocks.reserve(collections.size() + 1); // parameters + collections + + auto paramBlock = std::make_shared(); + // TODO: get rid of const_cast + paramBlock->metadata = const_cast(¶meters); + blocks.emplace_back(std::move(paramBlock)); + + for (const auto& [name, col] : collections) { + blocks.emplace_back(podio::SIOBlockFactory::instance().createBlock(col, name)); + } + + return blocks; + } + + /// Write the passed record and return where it starts in the file + inline sio::ifstream::pos_type writeRecord(const sio::block_list& blocks, const std::string& recordName, + sio::ofstream& stream, std::size_t initBufferSize = sio::mbyte, + bool compress = true) { + auto buffer = sio::buffer{initBufferSize}; + auto recInfo = sio::api::write_record(recordName, buffer, blocks, 0); + + if (compress) { + // use zlib to compress the record into another buffer + sio::zlib_compression compressor; + compressor.set_level(6); // Z_DEFAULT_COMPRESSION==6 + auto comBuffer = sio::buffer{initBufferSize}; + sio::api::compress_record(recInfo, buffer, comBuffer, compressor); + + sio::api::write_record(stream, buffer.span(0, recInfo._header_length), comBuffer.span(), recInfo); + } else { + sio::api::write_record(stream, buffer.span(), recInfo); + } + + return recInfo._file_start; + } + } // namespace sio_utils } // namespace podio From 3f6c7253e89424a67320b606ab408f92daa5b0dc Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Thu, 26 Jan 2023 10:36:44 +0100 Subject: [PATCH 034/100] Add var and check to relax required match of versions (#368) --- CMakeLists.txt | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b83e56ce1..e884a384d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,8 +65,9 @@ option(CLANG_TIDY "Run clang-tidy after compilation." OFF) ADD_CLANG_TIDY() #--- Declare options ----------------------------------------------------------- -option(CREATE_DOC "Whether or not to create doxygen doc target." OFF) -option(ENABLE_SIO "Build SIO I/O support" OFF) +option(CREATE_DOC "Whether or not to create doxygen doc target." OFF) +option(ENABLE_SIO "Build SIO I/O support" OFF) +option(PODIO_RELAX_PYVER "Do not require exact python version match with ROOT" OFF) #--- Declare ROOT dependency --------------------------------------------------- @@ -90,10 +91,23 @@ IF((TARGET ROOT::PyROOT OR TARGET ROOT::ROOTTPython) AND ${ROOT_VERSION} VERSION ENDIF() message( STATUS "Python version used for building ROOT ${ROOT_PYTHON_VERSION}" ) message( STATUS "Required python version ${REQUIRE_PYTHON_VERSION}") - FIND_PACKAGE(Python ${REQUIRE_PYTHON_VERSION} EXACT REQUIRED COMPONENTS Development Interpreter) -ELSE() - FIND_PACKAGE(Python COMPONENTS Development Interpreter) -ENDIF() + + if(NOT PODIO_RELAX_PYVER) + find_package(Python ${REQUIRE_PYTHON_VERSION} EXACT REQUIRED COMPONENTS Development Interpreter) + else() + find_package(Python ${REQUIRE_PYTHON_VERSION} REQUIRED COMPONENTS Development Interpreter) + string(REPLACE "." ";" _root_pyver_tuple ${REQUIRE_PYTHON_VERSION}) + list(GET _root_pyver_tuple 0 _root_pyver_major) + list(GET _root_pyver_tuple 1 _root_pyver_minor) + if(NOT "${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}" VERSION_EQUAL "${_root_pyver_major}.${_root_pyver_minor}") + message(FATAL_ERROR "There is a mismatch between the major and minor versions in Python" + " (found ${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}) and ROOT, compiled with " + "Python ${_root_pyver_major}.${_root_pyver_minor}") + endif() + endif() +else() + find_package(Python COMPONENTS Development Interpreter) +endif() # ROOT only sets usage requirements from 6.14, so for # earlier versions need to hack in INTERFACE_INCLUDE_DIRECTORIES From ba1594b1b9d4825055149cfbe2ca80a04725cda4 Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Thu, 2 Feb 2023 13:28:32 +0100 Subject: [PATCH 035/100] Make workflows not trigger twice on pushes to PRs (#364) * Do not trigger twice on PRs, add workflow_dispatch to run manually * Add comment to run the workflows manually --- .github/workflows/edm4hep.yaml | 7 ++++++- .github/workflows/key4hep.yml | 8 +++++++- .github/workflows/pre-commit.yml | 7 ++++++- .github/workflows/sanitizers.yaml | 8 +++++++- .github/workflows/test.yml | 8 +++++++- .github/workflows/ubuntu.yml | 7 ++++++- README.md | 7 +++++++ 7 files changed, 46 insertions(+), 6 deletions(-) diff --git a/.github/workflows/edm4hep.yaml b/.github/workflows/edm4hep.yaml index 3ee886439..827a513bf 100644 --- a/.github/workflows/edm4hep.yaml +++ b/.github/workflows/edm4hep.yaml @@ -1,6 +1,11 @@ name: edm4hep -on: [push, pull_request] +on: + push: + branches: + - master + pull_request: + workflow_dispatch: jobs: build-and-test: diff --git a/.github/workflows/key4hep.yml b/.github/workflows/key4hep.yml index dec780636..824d135b8 100644 --- a/.github/workflows/key4hep.yml +++ b/.github/workflows/key4hep.yml @@ -1,6 +1,12 @@ name: key4hep -on: [push, pull_request] +on: + push: + branches: + - master + pull_request: + workflow_dispatch: + jobs: build-and-test: runs-on: ubuntu-latest diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 187432d38..cc4eeba90 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,6 +1,11 @@ name: pre-commit -on: [push, pull_request] +on: + push: + branches: + - master + pull_request: + workflow_dispatch: jobs: pre-commit: diff --git a/.github/workflows/sanitizers.yaml b/.github/workflows/sanitizers.yaml index e15f2eade..f730c287e 100644 --- a/.github/workflows/sanitizers.yaml +++ b/.github/workflows/sanitizers.yaml @@ -1,6 +1,12 @@ name: sanitizers -on: [push, pull_request] +on: + push: + branches: + - master + pull_request: + workflow_dispatch: + jobs: build-and-test: runs-on: ubuntu-latest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 21ab4d56c..24849718b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,12 @@ name: linux -on: [push, pull_request] +on: + push: + branches: + - master + pull_request: + workflow_dispatch: + jobs: build-and-test: runs-on: ubuntu-latest diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index d7521bc53..8aea735f3 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -1,6 +1,11 @@ name: ubuntu -on: [push, pull_request] +on: + push: + branches: + - master + pull_request: + workflow_dispatch: jobs: test: diff --git a/README.md b/README.md index 2215a0c64..b78b9e3aa 100755 --- a/README.md +++ b/README.md @@ -164,6 +164,13 @@ After compilation you can run rudimentary tests with make test +## Running workflows +To run workflows manually (for example, when working on your own fork) go to +`Actions` then click on the workflow that you want to run (for example +`edm4hep`). Then if the workflow has the `workflow_dispatch` trigger you will be +able to run it by clicking `Run workflow` and selecting on which branch it will +run. + ## Advanced build topics It is possible to instrument the complete podio build with sanitizers using the From 811aca97d46a5ba622af0ff7a914aeef12a006cd Mon Sep 17 00:00:00 2001 From: Nathan Brei Date: Mon, 6 Feb 2023 09:35:05 -0500 Subject: [PATCH 036/100] Fix multiple-definition error in Frame.h (#369) Frame::Frame(), get(), put(), and putParameter() methods had (non-templated) definitions in the header file. These have now been specified as inline. --- .gitignore | 4 ++++ include/podio/Frame.h | 10 +++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index a838c3f20..cb2170b29 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,7 @@ spack* # Populated by cmake before build /include/podio/podioVersion.h /python/podio/__init__.py + +# CLion artifacts +.idea/ +cmake-build*/ \ No newline at end of file diff --git a/include/podio/Frame.h b/include/podio/Frame.h index 5f2357dae..41575032d 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -205,7 +205,7 @@ class Frame { /** Add a string value to the parameters of the Frame by copying it. Dedicated * overload for enabling the on-the-fly conversion on the string literals. */ - void putParameter(const std::string& key, std::string value) { + inline void putParameter(const std::string& key, std::string value) { putParameter(key, std::move(value)); } @@ -213,7 +213,7 @@ class Frame { * Dedicated overload for enabling on-the-fly conversions of initializer_list * of string literals. */ - void putParameter(const std::string& key, std::vector values) { + inline void putParameter(const std::string& key, std::vector values) { putParameter>(key, std::move(values)); } @@ -269,7 +269,7 @@ class Frame { // implementations below -Frame::Frame() : Frame(std::make_unique()) { +inline Frame::Frame() : Frame(std::make_unique()) { } template @@ -291,11 +291,11 @@ const CollT& Frame::get(const std::string& name) const { return emptyColl; } -const podio::CollectionBase* Frame::get(const std::string& name) const { +inline const podio::CollectionBase* Frame::get(const std::string& name) const { return m_self->get(name); } -void Frame::put(std::unique_ptr coll, const std::string& name) { +inline void Frame::put(std::unique_ptr coll, const std::string& name) { const auto* retColl = m_self->put(std::move(coll), name); if (!retColl) { // TODO: Handle collisions From a6d1c3d782a5e6ca5f4ed0d70e20670693cb8ae8 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko <34742917+kjvbrt@users.noreply.github.com> Date: Mon, 13 Feb 2023 17:04:50 +0100 Subject: [PATCH 037/100] Adding ID to the short podio-dump output (#373) * Adding ID to the short output --- tools/podio-dump | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/podio-dump b/tools/podio-dump index 0834053c9..efb5dfc18 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -38,7 +38,7 @@ def print_frame(frame, cat_name, ientry, detailed): print('Collections:') if not detailed: - print(f'{"Name":<30} {"Type":<40} {"Size":<10}') + print(f'{"Name":<38} {"ID":<4} {"Type":<32} {"Size":<10}') print('-' * 82) # Print collections @@ -49,7 +49,7 @@ def print_frame(frame, cat_name, ientry, detailed): coll.print() print(flush=True) else: - print(f'{name:<30} {coll.getValueTypeName():<40} {len(coll):<10}') + print(f'{name:<38} {coll.getID():<4} {coll.getValueTypeName():<32} {len(coll):<10}') # And then parameters print('\nParameters:', flush=True) From 9cadbd1e05d41f8db83b1dc97c1afb1fd379e8da Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 14 Feb 2023 09:57:20 +0100 Subject: [PATCH 038/100] Fix version preprocessor macro and make it usable (#375) --- podioVersion.in.h | 4 ++-- tests/unittest.cpp | 9 +++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/podioVersion.in.h b/podioVersion.in.h index 69c1d073b..717cce17a 100644 --- a/podioVersion.in.h +++ b/podioVersion.in.h @@ -13,7 +13,7 @@ /// Define a version to be used in podio. #define PODIO_VERSION(major, minor, patch) \ - (((unsigned long)(major) << 32) | ((unsigned long)(minor) << 16) | ((unsigned long)(patch))) + ((UINT64_C(major) << 32) | (UINT64_C(minor) << 16) | UINT64_C(patch)) /// Get the major version from a preprocessor defined version #define PODIO_MAJOR_VERSION(v) (((v) & (-1UL >> 16)) >> 32) /// Get the minor version from a preprocessor defined version @@ -22,10 +22,10 @@ #define PODIO_PATCH_VERSION(v) ((v) & (-1UL >> 48)) // Some helper constants that are populated by the cmake configure step -#define podio_VERSION @podio_VERSION@ #define podio_VERSION_MAJOR @podio_VERSION_MAJOR@ #define podio_VERSION_MINOR @podio_VERSION_MINOR@ #define podio_VERSION_PATCH @podio_VERSION_PATCH@ +#define podio_VERSION PODIO_VERSION(podio_VERSION_MAJOR, podio_VERSION_MINOR, podio_VERSION_PATCH) /// The encoded version with which podio has been built #define PODIO_BUILD_VERSION PODIO_VERSION(podio_VERSION_MAJOR, podio_VERSION_MINOR, podio_VERSION_PATCH) diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 218c58b17..dd93cced6 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -883,8 +883,13 @@ TEST_CASE("Version tests", "[versioning]") { TEST_CASE("Preprocessor version tests", "[versioning]") { SECTION("Basic functionality") { using namespace podio::version; - // Check that preprocessor comparisons work - STATIC_REQUIRE(PODIO_BUILD_VERSION == PODIO_VERSION(build_version.major, build_version.minor, build_version.patch)); + // Check that preprocessor comparisons work by actually invoking the + // preprocessor +#if PODIO_BUILD_VERSION == PODIO_VERSION(podio_VERSION_MAJOR, podio_VERSION_MINOR, podio_VERSION_PATCH) + STATIC_REQUIRE(true); +#else + STATIC_REQUIRE(false); +#endif // Make sure that we can actually decode 64 bit versions STATIC_REQUIRE(decode_version(PODIO_BUILD_VERSION) == build_version); From a108aeb2b3bc387dc9c303c96b06adf383c8d4dc Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Tue, 21 Feb 2023 11:14:36 +0100 Subject: [PATCH 039/100] Fix tests without SIO (#376) * Raise errors when the corresponding library is not found Co-authored-by: jmcarcell --- python/podio/reading.py | 4 +++- python/podio/sio_io.py | 7 +++++-- python/podio/test_ReaderSio.py | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/python/podio/reading.py b/python/podio/reading.py index f853e1795..b4f6401f5 100644 --- a/python/podio/reading.py +++ b/python/podio/reading.py @@ -18,7 +18,9 @@ def _is_frame_sio_file(filename): except ImportError: def _is_frame_sio_file(filename): """Stub raising a ValueError""" - raise ValueError('podio has not been built with SIO support, which is necessary to read this file') + raise ValueError('podio has not been built with SIO support, ' + 'which is necessary to read this file, ' + 'or there is a version mismatch') def _is_frame_root_file(filename): diff --git a/python/podio/sio_io.py b/python/podio/sio_io.py index 7559f16cb..ae695fbb7 100644 --- a/python/podio/sio_io.py +++ b/python/podio/sio_io.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 """Python module for reading sio files containing podio Frames""" -from podio.base_reader import BaseReaderMixin +from podio.base_reader import BaseReaderMixin # pylint: disable=wrong-import-position from ROOT import gSystem -gSystem.Load('libpodioSioIO') # noqa: 402 +ret = gSystem.Load('libpodioSioIO') # noqa: 402 +# Return values: -1 when it doesn't exist and -2 when there is a version mismatch +if ret < 0: + raise ImportError('Error when importing libpodioSioIO') from ROOT import podio # noqa: 402 # pylint: disable=wrong-import-position Writer = podio.SIOFrameWriter diff --git a/python/podio/test_ReaderSio.py b/python/podio/test_ReaderSio.py index 3d1f35eae..0429aa5d7 100644 --- a/python/podio/test_ReaderSio.py +++ b/python/podio/test_ReaderSio.py @@ -3,7 +3,6 @@ import unittest -from podio.sio_io import Reader, LegacyReader from podio.test_Reader import ReaderTestCaseMixin, LegacyReaderTestCaseMixin from podio.test_utils import SKIP_SIO_TESTS @@ -13,6 +12,7 @@ class SioReaderTestCase(ReaderTestCaseMixin, unittest.TestCase): """Test cases for root input files""" def setUp(self): """Setup the corresponding reader""" + from podio.sio_io import Reader # pylint: disable=import-outside-toplevel self.reader = Reader('example_frame.sio') @@ -21,4 +21,5 @@ class SIOLegacyReaderTestCase(LegacyReaderTestCaseMixin, unittest.TestCase): """Test cases for the legacy root input files and reader.""" def setUp(self): """Setup a reader, reading from the example files""" + from podio.sio_io import LegacyReader # pylint: disable=import-outside-toplevel self.reader = LegacyReader('example.sio') From 858fa01831acb61369e18a545e038df882199f53 Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Wed, 22 Feb 2023 11:45:31 +0100 Subject: [PATCH 040/100] Add a visualization tool (#377) * Initial commit that works * Add a legend and a few fixes * Exchange blue and red * Fix filename * Fix pylint issues * Fix linter issues and add some text when saving a file * Address comments; add grouping and change default format to svg * Add information in the README and delete an useless argument * Fix linter issues * Simplify code a bit since I wasn't able to align the legend * Fix change in the README too * Remove renderer * Add comment in the README and check for graphviz being installed * Exit if it doesn't exist * Change exit() -> sys.exit() * Remove trailing whitespace --- README.md | 26 ++++++++++++ tools/podio-vis | 109 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100755 tools/podio-vis diff --git a/README.md b/README.md index b78b9e3aa..97f7a51a0 100755 --- a/README.md +++ b/README.md @@ -66,6 +66,8 @@ In order for the `yaml` module to be working it might also be necessary to insta Check that you can now import the `yaml` and `jinja2` modules in python. +Optionally, `graphviz` is also required for the visualization tool `podio-vis`. + ## Preparing the environment Full use of PODIO requires you to set the `PODIO` environment variable @@ -183,3 +185,27 @@ smoother development experience with sanitizers enabled, these failing tests are labelled (e.g. `[ASAN-FAIL]` or `[THREAD-FAIL]`) and will not be run by default if the corresponding sanitizer is enabled. It is possible to force all tests to be run via the `FORCE_RUN_ALL_TESTS` cmake option. + +## Model visualization + +There is a tool to generate a diagram of the relationships between the elements +in a model. To generate a diagram run `python python/tools/podio-vis model.yaml` +and use `--help` for checking the possible options. In particular there is the +option `--graph-conf` that can be used to pass a configuration file defining +groups that will be clustered together in the diagram, like it is shown in. The +syntax is +``` +group label: + - datatype1 + - datatype2 +another group label: + - datatype3 + - datatype4 +``` + +Additionally, it is possible to remove from the diagram any +data type (let's call it `removed_datatype`) by adding to this configuration file: +``` +Filter: + - removed_datatype +``` diff --git a/tools/podio-vis b/tools/podio-vis new file mode 100755 index 000000000..032feb96e --- /dev/null +++ b/tools/podio-vis @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +'''Tool to transform data model descriptions in YAML to a graph that can be visualized''' + +import sys +import argparse +import yaml +from podio.podio_config_reader import PodioConfigReader +try: + from graphviz import Digraph +except ImportError: + print('graphviz is not installed. please run pip install graphviz') + sys.exit(1) + + +class ModelToGraphviz: + """Class to transform a data model description into a graphical representation""" + + def __init__(self, yamlfile, dot, fmt, filename, graph_conf): + self.yamlfile = yamlfile + self.use_dot = dot + self.datamodel = PodioConfigReader.read(yamlfile, 'podio') + self.graph = Digraph(node_attr={'shape': 'box'}) + self.graph.attr(rankdir='RL', size='8,5') + self.fmt = fmt + self.filename = filename + self.graph_conf = {} + self.remove = set() + if graph_conf: + with open(graph_conf, encoding='utf8') as inp: + self.graph_conf = yaml.safe_load(inp) + if 'Filter' in self.graph_conf: + self.remove = set(self.graph_conf['Filter']) + + def make_viz(self): + '''Make the graph and render it in the chosen format''' + + # Make the grouped nodes first + # It doesn't matter if they are remade latter so we don't need + # to check for that + for i, (label, group) in enumerate(self.graph_conf.items()): + with self.graph.subgraph(name=f'cluster{i+1}') as subgraph: + subgraph.attr(label=label) + for name in group: + if name in self.remove: + continue + subgraph.node(name.replace('::', '_'), label=name) + + with_association = False + for name, datatype in self.datamodel.datatypes.items(): + if name in self.remove: + continue + if 'Association' in name: + with_association = True + self.graph.edge(datatype['OneToOneRelations'][0].full_type.replace('::', '_'), + datatype['OneToOneRelations'][1].full_type.replace('::', '_'), + label=name.replace('edm4hep::', ''), color='black', dir='both') + continue + + compatible_name = name.replace('::', '_') # graphviz gets confused with C++ '::' and formatting strings + self.graph.node(compatible_name, label=name) + self.graph.attr('edge', color='blue') + for rel in datatype["OneToOneRelations"]: + if rel.full_type in self.remove: + continue + compatible_type = rel.full_type.replace('::', '_') + self.graph.edge(compatible_name, compatible_type) + self.graph.attr('edge', color='red') + for rel in datatype["OneToManyRelations"]: + if rel.full_type in self.remove: + continue + compatible_type = rel.full_type.replace('::', '_') + self.graph.edge(compatible_name, compatible_type) + + with self.graph.subgraph(name='cluster0') as subg: + subg.attr('node', shape='plaintext') + subg.node('l1', '') + subg.node('r1', 'One to One Relation') + subg.edge('l1', 'r1', color='blue') + subg.node('l2', '') + subg.node('r2', 'One to Many Relation') + subg.edge('l2', 'r2', color='red') + if with_association: + subg.node('r3', 'Association') + subg.node('l3', '') + subg.edge('l3', 'r3', color='black', dir='both') + + if self.use_dot: + self.graph.save() + else: + print(f'Saving file {self.filename} and {self.filename}.{self.fmt}') + self.graph.render(filename=self.filename, format=self.fmt) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Given a description yaml file this script generates ' + 'a visualization in the target directory') + + parser.add_argument('description', help='yaml file describing the datamodel') + parser.add_argument('-d', '--dot', action='store_true', default=False, + help='just write the dot file') + parser.add_argument('--fmt', default='svg', help='Which format to use for saving the file') + parser.add_argument('--filename', default='gv', help='Which filename to use for the output') + parser.add_argument('--graph-conf', help='Configuration file for defining groups') + + args = parser.parse_args() + + vis = ModelToGraphviz(args.description, args.dot, fmt=args.fmt, + filename=args.filename, graph_conf=args.graph_conf) + vis.make_viz() From 8488574fb8b948a53f618887d13040e79e8f9d24 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 27 Feb 2023 11:00:47 +0100 Subject: [PATCH 041/100] Expose more of the GenericParameters interface in the Frame (#380) * Add more parameter functionality to Frame interface * Mark getGenericParamtersForWrite as deprecated Now that getParamters exists * Mark inline defined methods as inline --- include/podio/Frame.h | 21 +++++++++++++++++---- python/podio/frame.py | 7 ++----- src/ROOTFrameWriter.cc | 5 ++--- src/SIOFrameWriter.cc | 2 +- tests/frame.cpp | 6 ++++++ 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/include/podio/Frame.h b/include/podio/Frame.h index 41575032d..4943c4160 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -198,7 +198,7 @@ class Frame { * Copy the value into the internal store */ template > - void putParameter(const std::string& key, T value) { + inline void putParameter(const std::string& key, T value) { m_self->parameters().setValue(key, value); } @@ -221,7 +221,7 @@ class Frame { * catching on-the-fly conversions of initializer_lists of values. */ template >> - void putParameter(const std::string& key, std::initializer_list&& values) { + inline void putParameter(const std::string& key, std::initializer_list&& values) { putParameter>(key, std::move(values)); } @@ -229,10 +229,23 @@ class Frame { * either by a const reference or a value depending on the desired type. */ template > - podio::GenericDataReturnType getParameter(const std::string& key) const { + inline podio::GenericDataReturnType getParameter(const std::string& key) const { return m_self->parameters().getValue(key); } + /** Get all parameters that are stored in this Frame + */ + inline const podio::GenericParameters& getParameters() const { + return m_self->parameters(); + } + + /** Get the keys of all stored parameters for a given type + */ + template > + inline std::vector getParameterKeys() const { + return m_self->parameters().getKeys(); + } + /** Get all **currently** available collections (including potentially * unpacked ones from raw data) */ @@ -246,7 +259,7 @@ class Frame { /** * Get the GenericParameters for writing */ - const podio::GenericParameters& getGenericParametersForWrite() const { + [[deprecated("use getParameters instead")]] const podio::GenericParameters& getGenericParametersForWrite() const { return m_self->parameters(); } diff --git a/python/podio/frame.py b/python/podio/frame.py index 7f4433959..45ccf7194 100644 --- a/python/podio/frame.py +++ b/python/podio/frame.py @@ -167,7 +167,7 @@ def get_parameters(self): podio.GenericParameters: The stored generic parameters """ # Going via the not entirely inteded way here - return self._frame.getGenericParametersForWrite() + return self._frame.getParameters() def get_param_info(self, name): """Get the parameter type information stored under the given name. @@ -197,13 +197,10 @@ def get_param_info(self, name): def _init_param_keys(self): """Initialize the param keys dict for easier lookup of the available parameters. - NOTE: This depends on a "side channel" that is usually reserved for the - writers but is currently still in the public interface of the Frame - Returns: dict: A dictionary mapping each key to the corresponding c++ type """ - params = self._frame.getGenericParametersForWrite() # this is the iffy bit + params = self._frame.getParameters() keys_dict = {} for par_type in SUPPORTED_PARAMETER_TYPES: keys = params.getKeys[par_type]() diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index 39708a096..d98d6763a 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -40,11 +40,10 @@ void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& c // We will at least have a parameters branch, even if there are no // collections if (catInfo.branches.empty()) { - initBranches(catInfo, collections, const_cast(frame.getGenericParametersForWrite())); + initBranches(catInfo, collections, const_cast(frame.getParameters())); } else { - resetBranches(catInfo.branches, collections, - &const_cast(frame.getGenericParametersForWrite())); + resetBranches(catInfo.branches, collections, &const_cast(frame.getParameters())); } catInfo.tree->Fill(); diff --git a/src/SIOFrameWriter.cc b/src/SIOFrameWriter.cc index f9ab6bdd9..f33bdbccc 100644 --- a/src/SIOFrameWriter.cc +++ b/src/SIOFrameWriter.cc @@ -44,7 +44,7 @@ void SIOFrameWriter::writeFrame(const podio::Frame& frame, const std::string& ca tableBlocks.emplace_back(sio_utils::createCollIDBlock(collections, frame.getCollectionIDTableForWrite())); m_tocRecord.addRecord(category, sio_utils::writeRecord(tableBlocks, category + "_HEADER", m_stream)); - const auto blocks = sio_utils::createBlocks(collections, frame.getGenericParametersForWrite()); + const auto blocks = sio_utils::createBlocks(collections, frame.getParameters()); sio_utils::writeRecord(blocks, category, m_stream); } diff --git a/tests/frame.cpp b/tests/frame.cpp index 38dd4e3c6..4d36909f1 100644 --- a/tests/frame.cpp +++ b/tests/frame.cpp @@ -40,6 +40,12 @@ TEST_CASE("Frame parameters", "[frame][basics]") { REQUIRE(strings[0] == "one"); REQUIRE(strings[1] == "two"); REQUIRE(strings[2] == "three"); + + const auto stringKeys = event.getParameterKeys(); + REQUIRE(stringKeys.size() == 2); + // Can't rely on an insertion order here + REQUIRE(std::find(stringKeys.begin(), stringKeys.end(), "aString") != stringKeys.end()); + REQUIRE(std::find(stringKeys.begin(), stringKeys.end(), "someStrings") != stringKeys.end()); } // NOTE: Due to the extremly small tasks that are done in these tests, they will From 4266e0bea4a0d95336dafa9eeabac8e1c3aba6f0 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 1 Mar 2023 09:40:02 +0100 Subject: [PATCH 042/100] Mark EventStore based I/O as deprecated (#378) * Add deprecation warning for EventStore based I/O * Add deprecation warning for EventStore python bindings * Make CI pass while deprecation warnings are in --- .github/workflows/edm4hep.yaml | 2 +- .github/workflows/key4hep.yml | 2 +- .github/workflows/test.yml | 2 +- .github/workflows/ubuntu.yml | 2 +- .pre-commit-config.yaml | 2 +- include/podio/ASCIIWriter.h | 3 ++- include/podio/EventStore.h | 3 ++- include/podio/IMetaDataProvider.h | 3 ++- include/podio/IReader.h | 3 ++- include/podio/PythonEventStore.h | 3 ++- include/podio/ROOTWriter.h | 3 ++- include/podio/SIOWriter.h | 3 ++- include/podio/TimedWriter.h | 3 ++- include/podio/utilities/Deprecated.h | 7 +++++++ python/podio/EventStore.py | 5 ++++- 15 files changed, 32 insertions(+), 14 deletions(-) create mode 100644 include/podio/utilities/Deprecated.h diff --git a/.github/workflows/edm4hep.yaml b/.github/workflows/edm4hep.yaml index 827a513bf..188c56f64 100644 --- a/.github/workflows/edm4hep.yaml +++ b/.github/workflows/edm4hep.yaml @@ -46,7 +46,7 @@ jobs: cmake -DENABLE_SIO=ON \ -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ + -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ -DUSE_EXTERNAL_CATCH2=ON \ -DBUILD_TESTING=OFF\ -G Ninja .. diff --git a/.github/workflows/key4hep.yml b/.github/workflows/key4hep.yml index 824d135b8..f831301fe 100644 --- a/.github/workflows/key4hep.yml +++ b/.github/workflows/key4hep.yml @@ -29,7 +29,7 @@ jobs: cmake -DENABLE_SIO=ON \ -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ + -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ -DUSE_EXTERNAL_CATCH2=ON \ -G Ninja .. echo "::endgroup::" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 24849718b..a57fa321a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,7 +32,7 @@ jobs: cmake -DENABLE_SIO=${{ matrix.sio }} \ -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ + -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ -DUSE_EXTERNAL_CATCH2=OFF \ -G Ninja .. echo "::endgroup::" diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 8aea735f3..16ae05bee 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -29,7 +29,7 @@ jobs: cmake -DENABLE_SIO=${{ matrix.sio }} \ -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ - -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror " \ + -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ -DUSE_EXTERNAL_CATCH2=OFF \ -DPODIO_SET_RPATH=ON \ -G Ninja .. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e0928466..48edb8d6b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,7 @@ repos: hooks: - id: clang-tidy name: clang-tidy - entry: clang-tidy -warnings-as-errors='*' -p compile_commands.json + entry: clang-tidy -warnings-as-errors='*,-clang-diagnostic-deprecated-declarations' -p compile_commands.json types: [c++] exclude: (tests/(datamodel|src)/.*(h|cc)|podioVersion.in.h) language: system diff --git a/include/podio/ASCIIWriter.h b/include/podio/ASCIIWriter.h index 9916e6819..54fd2040b 100644 --- a/include/podio/ASCIIWriter.h +++ b/include/podio/ASCIIWriter.h @@ -2,6 +2,7 @@ #define PODIO_ASCIIWRITER_H #include "podio/EventStore.h" +#include "podio/utilities/Deprecated.h" #include #include @@ -30,7 +31,7 @@ struct ColWriter : public ColWriterBase { typedef std::map FunMap; -class ASCIIWriter { +class DEPR_EVTSTORE ASCIIWriter { public: ASCIIWriter(const std::string& filename, EventStore* store); diff --git a/include/podio/EventStore.h b/include/podio/EventStore.h index e84b7e57e..f8ee70dc4 100644 --- a/include/podio/EventStore.h +++ b/include/podio/EventStore.h @@ -13,6 +13,7 @@ #include "podio/GenericParameters.h" #include "podio/ICollectionProvider.h" #include "podio/IMetaDataProvider.h" +#include "podio/utilities/Deprecated.h" /** This is an *example* event store @@ -33,7 +34,7 @@ class IReader; typedef std::map RunMDMap; typedef std::map ColMDMap; -class EventStore : public ICollectionProvider, public IMetaDataProvider { +class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataProvider { public: /// Make non-copyable EventStore(const EventStore&) = delete; diff --git a/include/podio/IMetaDataProvider.h b/include/podio/IMetaDataProvider.h index ca735b7ec..20f1941b0 100644 --- a/include/podio/IMetaDataProvider.h +++ b/include/podio/IMetaDataProvider.h @@ -2,6 +2,7 @@ #define PODIO_IMETADATAPROVIDER_H #include "podio/GenericParameters.h" +#include "podio/utilities/Deprecated.h" namespace podio { @@ -9,7 +10,7 @@ namespace podio { * @author F. Gaede, DESY * @date Apr 2020 */ -class IMetaDataProvider { +class DEPR_EVTSTORE IMetaDataProvider { public: /// destructor diff --git a/include/podio/IReader.h b/include/podio/IReader.h index 12fcb6944..13553bf87 100644 --- a/include/podio/IReader.h +++ b/include/podio/IReader.h @@ -2,6 +2,7 @@ #define PODIO_IREADER_H #include "podio/podioVersion.h" +#include "podio/utilities/Deprecated.h" #include #include @@ -22,7 +23,7 @@ class CollectionBase; class CollectionIDTable; class GenericParameters; -class IReader { +class DEPR_EVTSTORE IReader { public: virtual ~IReader() = default; /// Read Collection of given name diff --git a/include/podio/PythonEventStore.h b/include/podio/PythonEventStore.h index 5ab68f15e..8ae4d5140 100644 --- a/include/podio/PythonEventStore.h +++ b/include/podio/PythonEventStore.h @@ -4,12 +4,13 @@ #include "podio/EventStore.h" #include "podio/GenericParameters.h" #include "podio/IReader.h" +#include "podio/utilities/Deprecated.h" #include namespace podio { -class PythonEventStore { +class DEPR_EVTSTORE PythonEventStore { public: /// constructor from filename PythonEventStore(const char* filename); diff --git a/include/podio/ROOTWriter.h b/include/podio/ROOTWriter.h index 6ba79d06e..6751809ae 100644 --- a/include/podio/ROOTWriter.h +++ b/include/podio/ROOTWriter.h @@ -4,6 +4,7 @@ #include "podio/CollectionBase.h" #include "podio/CollectionBranches.h" #include "podio/EventStore.h" +#include "podio/utilities/Deprecated.h" #include "TBranch.h" @@ -18,7 +19,7 @@ class TFile; class TTree; namespace podio { -class ROOTWriter { +class DEPR_EVTSTORE ROOTWriter { public: ROOTWriter(const std::string& filename, EventStore* store); diff --git a/include/podio/SIOWriter.h b/include/podio/SIOWriter.h index af2a9ffff..29ce0bc7d 100644 --- a/include/podio/SIOWriter.h +++ b/include/podio/SIOWriter.h @@ -4,6 +4,7 @@ #include "podio/CollectionBase.h" #include "podio/EventStore.h" #include "podio/SIOBlock.h" +#include "podio/utilities/Deprecated.h" // SIO specific includes #include @@ -15,7 +16,7 @@ // forward declarations namespace podio { -class SIOWriter { +class DEPR_EVTSTORE SIOWriter { public: SIOWriter(const std::string& filename, EventStore* store); diff --git a/include/podio/TimedWriter.h b/include/podio/TimedWriter.h index 32660408c..90e154273 100644 --- a/include/podio/TimedWriter.h +++ b/include/podio/TimedWriter.h @@ -3,6 +3,7 @@ #include "podio/BenchmarkRecorder.h" #include "podio/BenchmarkUtil.h" +#include "podio/utilities/Deprecated.h" #include #include @@ -10,7 +11,7 @@ namespace podio { template -class TimedWriter { +class DEPR_EVTSTORE TimedWriter { using ClockT = benchmark::ClockT; public: diff --git a/include/podio/utilities/Deprecated.h b/include/podio/utilities/Deprecated.h new file mode 100644 index 000000000..566bca814 --- /dev/null +++ b/include/podio/utilities/Deprecated.h @@ -0,0 +1,7 @@ +#ifndef PODIO_UTILITIES_DEPRECATED_H +#define PODIO_UTILITIES_DEPRECATED_H + +#define DEPR_EVTSTORE \ + [[deprecated("The EventStore based I/O model is deprecated and will be removed. Switch to the Frame based model.")]] + +#endif // PODIO_UTILITIES_DEPRECATED_H diff --git a/python/podio/EventStore.py b/python/podio/EventStore.py index c1a2df3d4..d2df33cbe 100644 --- a/python/podio/EventStore.py +++ b/python/podio/EventStore.py @@ -1,7 +1,10 @@ """Python EventStore for reading files with podio generated datamodels""" +import warnings +warnings.warn("The EventStore based I/O model is deprecated and will be removed. Switch to the Frame based model.", + FutureWarning) -from ROOT import gSystem +from ROOT import gSystem # pylint: disable=wrong-import-position gSystem.Load("libpodioPythonStore") # noqa: E402 from ROOT import podio # noqa: E402 # pylint: disable=wrong-import-position From 948896dc0d0006b15aa643d74715144e27f1c581 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 1 Mar 2023 10:34:57 +0100 Subject: [PATCH 043/100] Add `double` to the supported types of `GenericParameters` (#372) * Add double map to GenericParamters Add tests for constructors * Make python bindings work for double parameters again * Make sure to also print double values * Add test case for float value retrieval in python * Make sure to write double params in SIO, add legacy test case * Make SIO reading backward compatible * Fix typos and remove redundant test --- include/podio/GenericParameters.h | 30 +++++++++-- include/podio/SIOBlock.h | 4 +- python/podio/frame.py | 82 +++++++++++++++++-------------- python/podio/test_Frame.py | 6 ++- src/GenericParameters.cc | 12 +++-- src/SIOBlock.cc | 14 ++++-- src/selection.xml | 2 + tests/read_test.h | 9 ++++ tests/unittest.cpp | 58 ++++++++++++++++++++++ tests/write_frame.h | 3 ++ tests/write_test.h | 1 + 11 files changed, 168 insertions(+), 53 deletions(-) diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index b2fd37b16..965bee334 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -17,7 +17,7 @@ namespace podio { /// The types which are supported in the GenericParameters -using SupportedGenericDataTypes = std::tuple; +using SupportedGenericDataTypes = std::tuple; /// Static bool for determining if a type T is a supported GenericParamter type template @@ -79,6 +79,7 @@ class GenericParameters { private: using IntMap = MapType; using FloatMap = MapType; + using DoubleMap = MapType; using StringMap = MapType; // need mutex pointers for having the possibility to copy/move GenericParameters using MutexPtr = std::unique_ptr; @@ -112,6 +113,11 @@ class GenericParameters { setValue(key, std::move(value)); } + /// Overlaod for catching initializer list setting of string vector values + void setValue(const std::string& key, std::vector values) { + setValue>(key, std::move(values)); + } + /// Overload for catching initializer list setting for vector values template >> void setValue(const std::string& key, std::initializer_list&& values) { @@ -223,6 +229,16 @@ class GenericParameters { return getMap(); } + /** + * Get the internal double map (necessary for serialization with SIO) + */ + const DoubleMap& getDoubleMap() const { + return getMap(); + } + DoubleMap& getDoubleMap() { + return getMap(); + } + /** * Get the internal string map (necessary for serialization with SIO) */ @@ -241,6 +257,8 @@ class GenericParameters { return _intMap; } else if constexpr (std::is_same_v, float>) { return _floatMap; + } else if constexpr (std::is_same_v, double>) { + return _doubleMap; } else { return _stringMap; } @@ -253,6 +271,8 @@ class GenericParameters { return _intMap; } else if constexpr (std::is_same_v, float>) { return _floatMap; + } else if constexpr (std::is_same_v, double>) { + return _doubleMap; } else { return _stringMap; } @@ -265,6 +285,8 @@ class GenericParameters { return *(m_intMtx.get()); } else if constexpr (std::is_same_v, float>) { return *(m_floatMtx.get()); + } else if constexpr (std::is_same_v, double>) { + return *(m_doubleMtx.get()); } else { return *(m_stringMtx.get()); } @@ -275,8 +297,10 @@ class GenericParameters { mutable MutexPtr m_intMtx{nullptr}; ///< The mutex guarding the integer map FloatMap _floatMap{}; ///< The map storing the float values mutable MutexPtr m_floatMtx{nullptr}; ///< The mutex guarding the float map - StringMap _stringMap{}; ///< The map storing the double values - mutable MutexPtr m_stringMtx{nullptr}; ///< The mutex guarding the float map + StringMap _stringMap{}; ///< The map storing the string values + mutable MutexPtr m_stringMtx{nullptr}; ///< The mutex guarding the string map + DoubleMap _doubleMap{}; ///< The map storing the double values + mutable MutexPtr m_doubleMtx{nullptr}; ///< The mutex guarding the double map }; template diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 123f8ef6a..95e3de27f 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -129,7 +129,7 @@ struct SIOVersionBlock : public sio::block { */ class SIOEventMetaDataBlock : public sio::block { public: - SIOEventMetaDataBlock() : sio::block("EventMetaData", sio::version::encode_version(0, 1)) { + SIOEventMetaDataBlock() : sio::block("EventMetaData", sio::version::encode_version(0, 2)) { } SIOEventMetaDataBlock(const SIOEventMetaDataBlock&) = delete; @@ -146,7 +146,7 @@ class SIOEventMetaDataBlock : public sio::block { */ class SIONumberedMetaDataBlock : public sio::block { public: - SIONumberedMetaDataBlock(const std::string& name) : sio::block(name, sio::version::encode_version(0, 1)) { + SIONumberedMetaDataBlock(const std::string& name) : sio::block(name, sio::version::encode_version(0, 2)) { } SIONumberedMetaDataBlock(const SIONumberedMetaDataBlock&) = delete; diff --git a/python/podio/frame.py b/python/podio/frame.py index 45ccf7194..70418451c 100644 --- a/python/podio/frame.py +++ b/python/podio/frame.py @@ -10,49 +10,52 @@ from ROOT import podio # noqa: E402 # pylint: disable=wrong-import-position -def _determine_supported_parameter_types(lang): +def _determine_supported_parameter_types(): """Determine the supported types for the parameters. - Args: - lang (str): Language for which the type names should be returned. Either - 'c++' or 'py'. - Returns: - tuple (str): the tuple with the string representation of all **c++** - classes that are supported + tuple(tuple(str, str)): the tuple with the string representation of all + c++ and their corresponding python types that are supported """ types_tuple = podio.SupportedGenericDataTypes() n_types = cppyy.gbl.std.tuple_size[podio.SupportedGenericDataTypes].value # Get the python types with the help of cppyy and the STL - py_types = (type(cppyy.gbl.std.get[i](types_tuple)).__name__ for i in range(n_types)) - if lang == 'py': - return tuple(py_types) - if lang == 'c++': - # Map of types that need special care when going from python to c++ - py_to_cpp_type_map = { - 'str': 'std::string' - } - # Convert them to the corresponding c++ types - return tuple(py_to_cpp_type_map.get(t, t) for t in py_types) + py_types = [type(cppyy.gbl.std.get[i](types_tuple)).__name__ for i in range(n_types)] + + def _determine_cpp_type(idx_and_type): + """Determine the actual c++ type from the python type name. + + Mainly maps 'str' to 'std::string', and also determines whether a python + 'float' is actually a 'double' or a 'float' in c++. The latter is necessary + since python only has float (corresponding to double in c++) and we + need the exact c++ type + """ + idx, typename = idx_and_type + if typename == 'float': + cpp_type = cppyy.gbl.std.tuple_element[idx, podio.SupportedGenericDataTypes].type + if cppyy.typeid(cpp_type).name() == 'd': + return 'double' + return 'float' + if typename == 'str': + return 'std::string' + return typename - raise ValueError(f"lang needs to be 'py' or 'c++' (got {lang})") + cpp_types = list(map(_determine_cpp_type, enumerate(py_types))) + return tuple(zip(cpp_types, py_types)) -SUPPORTED_PARAMETER_TYPES = _determine_supported_parameter_types('c++') -SUPPORTED_PARAMETER_PY_TYPES = _determine_supported_parameter_types('py') +SUPPORTED_PARAMETER_TYPES = _determine_supported_parameter_types() -# Map that is necessary for easier disambiguation of parameters that are -# available with more than one type under the same name. Maps a python type to -# a c++ vector of the corresponding type or a c++ type to the vector -_PY_TO_CPP_TYPE_MAP = { - pytype: f'std::vector<{cpptype}>' for (pytype, cpptype) in zip(SUPPORTED_PARAMETER_PY_TYPES, - SUPPORTED_PARAMETER_TYPES) - } -_PY_TO_CPP_TYPE_MAP.update({ - f'{cpptype}': f'std::vector<{cpptype}>' for cpptype in SUPPORTED_PARAMETER_TYPES - }) +def _get_cpp_vector_types(type_str): + """Get the possible std::vector from the passed py_type string.""" + # Gather a list of all types that match the type_str (c++ or python) + types = list(filter(lambda t: type_str in t, SUPPORTED_PARAMETER_TYPES)) + if not types: + raise ValueError(f'{type_str} cannot be mapped to a valid parameter type') + + return [f'std::vector<{t}>' for t in map(lambda x: x[0], types)] class Frame: @@ -130,7 +133,6 @@ def get_parameter(self, name, as_type=None): ValueError: If there are multiple parameters with the same name, but multiple types and no type specifier to disambiguate between them has been passed. - """ def _get_param_value(par_type, name): par_value = self._frame.getParameter[par_type](name) @@ -148,14 +150,18 @@ def _get_param_value(par_type, name): raise ValueError(f'{name} parameter has {len(par_type)} different types available, ' 'but no as_type argument to disambiguate') - req_type = _PY_TO_CPP_TYPE_MAP.get(as_type, None) - if req_type is None: - raise ValueError(f'as_type value {as_type} cannot be mapped to a valid parameter type') - - if req_type not in par_type: + # Get all possible c++ vector types and see if we can unambiguously map them + # to the available types for this parameter + vec_types = _get_cpp_vector_types(as_type) + vec_types = [t for t in vec_types if t in par_type] + if len(vec_types) == 0: raise ValueError(f'{name} parameter is not available as type {as_type}') - return _get_param_value(req_type, name) + if len(vec_types) > 1: + raise ValueError(f'{name} parameter cannot be unambiguously mapped to a c++ type with ' + f'{as_type=}. Consider passing in the c++ type instead of the python type') + + return _get_param_value(vec_types[0], name) def get_parameters(self): """Get the complete podio::GenericParameters object stored in this Frame. @@ -202,7 +208,7 @@ def _init_param_keys(self): """ params = self._frame.getParameters() keys_dict = {} - for par_type in SUPPORTED_PARAMETER_TYPES: + for par_type, _ in SUPPORTED_PARAMETER_TYPES: keys = params.getKeys[par_type]() for key in keys: # Make sure to convert to a python string here to not have a dangling diff --git a/python/podio/test_Frame.py b/python/podio/test_Frame.py index 31e510ba0..8f4337c4f 100644 --- a/python/podio/test_Frame.py +++ b/python/podio/test_Frame.py @@ -19,7 +19,7 @@ EXPECTED_EXTENSION_COLL_NAMES = {"extension_Contained", "extension_ExternalComponent", "extension_ExternalRelation"} # The expected parameter names in each frame -EXPECTED_PARAM_NAMES = {'anInt', 'UserEventWeight', 'UserEventName', 'SomeVectorData'} +EXPECTED_PARAM_NAMES = {'anInt', 'UserEventWeight', 'UserEventName', 'SomeVectorData', 'SomeValue'} class FrameTest(unittest.TestCase): @@ -93,7 +93,9 @@ def test_frame_parameters(self): with self.assertRaises(ValueError): # Parameter not available as float (only int and string) - _ = self.event.get_parameter('SomeVectorData', as_type='float') + _ = self.event.get_parameter('SomeValue', as_type='float') self.assertEqual(self.event.get_parameter('SomeVectorData', as_type='int'), [1, 2, 3, 4]) self.assertEqual(self.event.get_parameter('SomeVectorData', as_type='str'), ["just", "some", "strings"]) + # as_type='float' will also retrieve double values (if the name is unambiguous) + self.assertEqual(self.event.get_parameter('SomeVectorData', as_type='float'), [0.0, 0.0]) diff --git a/src/GenericParameters.cc b/src/GenericParameters.cc index c1597d16f..1307d5335 100644 --- a/src/GenericParameters.cc +++ b/src/GenericParameters.cc @@ -8,23 +8,27 @@ namespace podio { GenericParameters::GenericParameters() : m_intMtx(std::make_unique()), m_floatMtx(std::make_unique()), - m_stringMtx(std::make_unique()) { + m_stringMtx(std::make_unique()), + m_doubleMtx(std::make_unique()) { } GenericParameters::GenericParameters(const GenericParameters& other) : m_intMtx(std::make_unique()), m_floatMtx(std::make_unique()), - m_stringMtx(std::make_unique()) { + m_stringMtx(std::make_unique()), + m_doubleMtx(std::make_unique()) { { // acquire all three locks at once to make sure all three internal maps are // copied at the same "state" of the GenericParameters auto& intMtx = other.getMutex(); auto& floatMtx = other.getMutex(); auto& stringMtx = other.getMutex(); - std::scoped_lock lock(intMtx, floatMtx, stringMtx); + auto& doubleMtx = other.getMutex(); + std::scoped_lock lock(intMtx, floatMtx, stringMtx, doubleMtx); _intMap = other._intMap; _floatMap = other._floatMap; _stringMap = other._stringMap; + _doubleMap = other._doubleMap; } } @@ -134,6 +138,8 @@ void GenericParameters::print(std::ostream& os, bool flush) { printMap(getMap(), os); os << "\nfloat parameters\n"; printMap(getMap(), os); + os << "\ndouble parameters\n"; + printMap(getMap(), os); os << "\nstd::string parameters\n"; printMap(getMap(), os); diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index 97877adc0..6c7a95ba6 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -75,30 +75,34 @@ void writeGenericParameters(sio::write_device& device, const GenericParameters& writeParamMap(device, params.getIntMap()); writeParamMap(device, params.getFloatMap()); writeParamMap(device, params.getStringMap()); + writeParamMap(device, params.getDoubleMap()); } -void readGenericParameters(sio::read_device& device, GenericParameters& params) { +void readGenericParameters(sio::read_device& device, GenericParameters& params, sio::version_type version) { readParamMap(device, params.getIntMap()); readParamMap(device, params.getFloatMap()); readParamMap(device, params.getStringMap()); + if (version >= sio::version::encode_version(0, 2)) { + readParamMap(device, params.getDoubleMap()); + } } -void SIOEventMetaDataBlock::read(sio::read_device& device, sio::version_type) { - readGenericParameters(device, *metadata); +void SIOEventMetaDataBlock::read(sio::read_device& device, sio::version_type version) { + readGenericParameters(device, *metadata, version); } void SIOEventMetaDataBlock::write(sio::write_device& device) { writeGenericParameters(device, *metadata); } -void SIONumberedMetaDataBlock::read(sio::read_device& device, sio::version_type) { +void SIONumberedMetaDataBlock::read(sio::read_device& device, sio::version_type version) { int size; device.data(size); while (size--) { int id; device.data(id); GenericParameters params; - readGenericParameters(device, params); + readGenericParameters(device, params, version); data->emplace(id, std::move(params)); } diff --git a/src/selection.xml b/src/selection.xml index d2c0d9adb..3c0be36e3 100644 --- a/src/selection.xml +++ b/src/selection.xml @@ -3,6 +3,7 @@ + @@ -10,6 +11,7 @@ + diff --git a/tests/read_test.h b/tests/read_test.h index 9cc677aaf..6382cb20a 100644 --- a/tests/read_test.h +++ b/tests/read_test.h @@ -90,6 +90,15 @@ void processEvent(StoreT& store, int eventNum, podio::version::Version fileVersi } } + if constexpr (!isEventStore) { + if (fileVersion > podio::version::Version{0, 16, 2}) { + const auto doubleParams = store.template getParameter>("SomeVectorData"); + if (doubleParams.size() != 2 || doubleParams[0] != eventNum * 1.1 || doubleParams[1] != eventNum * 2.2) { + throw std::runtime_error("Could not read event parameter: 'SomeDoubleValues' correctly"); + } + } + } + try { // not assigning to a variable, because it will remain unused, we just want // the exception here diff --git a/tests/unittest.cpp b/tests/unittest.cpp index dd93cced6..e16b2eb9c 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -936,6 +936,22 @@ TEST_CASE("GenericParameters", "[generic-parameters]") { gp.setValue("aString", "const char initialized"); REQUIRE(gp.getValue("aString") == "const char initialized"); + gp.setValue("aStringVec", {"init", "from", "const", "chars"}); + const auto& stringVec = gp.getValue>("aStringVec"); + REQUIRE(stringVec.size() == 4); + REQUIRE(stringVec[0] == "init"); + REQUIRE(stringVec[3] == "chars"); + + // Check that storing double values works + gp.setValue("double", 1.234); + gp.setValue("manyDoubles", {1.23, 4.56, 7.89}); + REQUIRE(gp.getValue("double") == 1.234); + const auto& storedDoubles = gp.getValue>("manyDoubles"); + REQUIRE(storedDoubles.size() == 3); + REQUIRE(storedDoubles[0] == 1.23); + REQUIRE(storedDoubles[1] == 4.56); + REQUIRE(storedDoubles[2] == 7.89); + // Check that passing an initializer_list creates the vector on the fly gp.setValue("manyInts", {1, 2, 3, 4}); const auto& ints = gp.getValue>("manyInts"); @@ -973,6 +989,48 @@ TEST_CASE("GenericParameters", "[generic-parameters]") { REQUIRE(gp.getValue>("MissingValue").empty()); } +TEST_CASE("GenericParameters constructors", "[generic-parameters]") { + // Tests for making sure that generic parameters can be moved / copied correctly + auto originalParams = podio::GenericParameters{}; + originalParams.setValue("int", 42); + originalParams.setValue("ints", {1, 2}); + originalParams.setValue("float", 3.14f); + originalParams.setValue("double", 2 * 3.14); + originalParams.setValue("strings", {"one", "two", "three"}); + + SECTION("Copy constructor") { + auto copiedParams = originalParams; + REQUIRE(copiedParams.getValue("int") == 42); + REQUIRE(copiedParams.getValue>("ints")[1] == 2); + REQUIRE(copiedParams.getValue("float") == 3.14f); + REQUIRE(copiedParams.getValue("double") == 2 * 3.14); + REQUIRE(copiedParams.getValue>("strings")[0] == "one"); + + // Make sure these are truly independent copies now + copiedParams.setValue("anotherDouble", 1.2345); + REQUIRE(originalParams.getValue("anotherDouble") == double{}); + } + + SECTION("Move constructor") { + auto copiedParams = std::move(originalParams); + REQUIRE(copiedParams.getValue("int") == 42); + REQUIRE(copiedParams.getValue>("ints")[1] == 2); + REQUIRE(copiedParams.getValue("float") == 3.14f); + REQUIRE(copiedParams.getValue("double") == 2 * 3.14); + REQUIRE(copiedParams.getValue>("strings")[0] == "one"); + } + + SECTION("Move assignment") { + auto copiedParams = podio::GenericParameters{}; + copiedParams = std::move(originalParams); + REQUIRE(copiedParams.getValue("int") == 42); + REQUIRE(copiedParams.getValue>("ints")[1] == 2); + REQUIRE(copiedParams.getValue("float") == 3.14f); + REQUIRE(copiedParams.getValue("double") == 2 * 3.14); + REQUIRE(copiedParams.getValue>("strings")[0] == "one"); + } +} + // Helper alias template "macro" to get the return type of calling // GenericParameters::getValue with the desired template type template diff --git a/tests/write_frame.h b/tests/write_frame.h index 85b50d7e3..891a029d5 100644 --- a/tests/write_frame.h +++ b/tests/write_frame.h @@ -420,6 +420,9 @@ podio::Frame makeFrame(int iFrame) { frame.putParameter("UserEventName", " event_number_" + std::to_string(iFrame)); frame.putParameter("SomeVectorData", {1, 2, 3, 4}); frame.putParameter("SomeVectorData", {"just", "some", "strings"}); + frame.putParameter("SomeVectorData", {iFrame * 1.1, iFrame * 2.2}); + frame.putParameter("SomeValue", "string value"); + frame.putParameter("SomeValue", 42); // An empty collection frame.put(ExampleClusterCollection(), "emptyCollection"); diff --git a/tests/write_test.h b/tests/write_test.h index 083657787..a43974099 100644 --- a/tests/write_test.h +++ b/tests/write_test.h @@ -87,6 +87,7 @@ void write(podio::EventStore& store, WriterT& writer) { ss << " event_number_" << i; evtMD.setValue("UserEventName", ss.str()); evtMD.setValue("SomeVectorData", {1, 2, 3, 4}); + evtMD.setValue("SomeVectorData", {i * 1.1, i * 2.2}); auto& colMD = store.getCollectionMetaData(hits.getID()); colMD.setValue("CellIDEncodingString", "system:8,barrel:3,layer:6,slice:5,x:-16,y:-16"); From ada84438744359f2b238c35e5e12b014a87fd210 Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Mon, 6 Mar 2023 02:50:48 -0500 Subject: [PATCH 044/100] Add a conversion operator to std::string for podio::version::Version (#384) --- podioVersion.in.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/podioVersion.in.h b/podioVersion.in.h index 717cce17a..aae468b6a 100644 --- a/podioVersion.in.h +++ b/podioVersion.in.h @@ -2,6 +2,7 @@ #define PODIO_PODIOVERSION_H #include +#include #include #include #if __cplusplus >= 202002L @@ -61,6 +62,12 @@ struct Version { #undef DEFINE_COMP_OPERATOR #endif + explicit operator std::string() const { + std::stringstream ss; + ss << *this; + return ss.str(); + }; + friend std::ostream& operator<<(std::ostream&, const Version& v); }; From dc9b6bad7e5e3d499cfa900cbe74ea2aa4c85bab Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 7 Mar 2023 09:16:04 +0100 Subject: [PATCH 045/100] Store the model definition into files that are written (#358) * Split model reading into file reading and parsing * Add possibility to dump parsed EDMs to JSON * Add registry for datamodel JSON defintions - Generate model definitions in JSON format as constexpr string literals - Register constexpr string literals in registry * Populate definition registry via static variable initialization - Also make the collections query-able * Write definition and read it back afterwards (ROOT) * Read and write EDM definitions in SIO * Refactor EDM defintion I/O functionality for less code duplication * Add first version of model dumping * Make dumped models look more like inputs - Change order of main keys (options, components, datatypes) - Slightly tweak formatting (as far as possible with PyYAML) * Add roundtrip tests for stored EDM definitions * Add documentation for EDM definition embedding * Fix documentation * Add warnings output when trying to retrieve non existant EDMs * Rename EDMDefinitionRegistry and clarify documentation * Fix python bindings and rename tests * Make utility classes members instead of using them as mixin * Rename header file to match new terminology * Update documentation to match implementation again * Update test names also in ignored tests list --- doc/advanced_topics.md | 123 ++++++++++++++++++ include/podio/CollectionBase.h | 3 + include/podio/DatamodelRegistry.h | 99 ++++++++++++++ include/podio/ROOTFrameReader.h | 12 ++ include/podio/ROOTFrameWriter.h | 3 + include/podio/SIOBlock.h | 59 +++++++++ include/podio/SIOFrameReader.h | 15 +++ include/podio/SIOFrameWriter.h | 2 + include/podio/UserDataCollection.h | 5 + .../utilities/DatamodelRegistryIOHelpers.h | 76 +++++++++++ include/podio/utilities/TypeHelpers.h | 58 +++++++++ python/podio/base_reader.py | 25 ++++ python/podio/generator_utils.py | 31 ++++- python/podio/podio_config_reader.py | 27 ++-- python/podio/test_DataModelJSONEncoder.py | 71 ++++++++++ python/podio_class_generator.py | 19 ++- python/templates/CMakeLists.txt | 1 + python/templates/Collection.cc.jinja2 | 5 + python/templates/Collection.h.jinja2 | 2 + python/templates/DatamodelDefinition.h.jinja2 | 30 +++++ src/CMakeLists.txt | 7 +- src/DatamodelRegistry.cc | 63 +++++++++ src/DatamodelRegistryIOHelpers.cc | 49 +++++++ src/ROOTFrameReader.cc | 10 +- src/ROOTFrameWriter.cc | 5 + src/SIOBlock.cc | 40 ++---- src/SIOFrameReader.cc | 28 +++- src/SIOFrameWriter.cc | 9 ++ src/rootUtils.h | 7 + src/selection.xml | 1 + src/sioUtils.h | 1 + tests/CMakeLists.txt | 46 ++++++- tests/CTestCustom.cmake | 5 + tests/scripts/dumpModelRoundTrip.sh | 36 +++++ tests/{ => scripts}/get_test_inputs.sh | 0 tools/podio-dump | 29 ++++- 36 files changed, 951 insertions(+), 51 deletions(-) create mode 100644 include/podio/DatamodelRegistry.h create mode 100644 include/podio/utilities/DatamodelRegistryIOHelpers.h create mode 100644 python/podio/test_DataModelJSONEncoder.py create mode 100644 python/templates/DatamodelDefinition.h.jinja2 create mode 100644 src/DatamodelRegistry.cc create mode 100644 src/DatamodelRegistryIOHelpers.cc create mode 100755 tests/scripts/dumpModelRoundTrip.sh rename tests/{ => scripts}/get_test_inputs.sh (100%) diff --git a/doc/advanced_topics.md b/doc/advanced_topics.md index 1ebdcad28..c2235f565 100644 --- a/doc/advanced_topics.md +++ b/doc/advanced_topics.md @@ -110,3 +110,126 @@ To implement your own transient event store, the only requirement is to set the - Run pre-commit manually `$ pre-commit run --all-files` + +## Retrieving the EDM definition from a data file +It is possible to get the EDM definition(s) that was used to generate the +datatypes that are stored in a data file. This makes it possible to re-generate +the necessary code and build all libraries again in case they are not easily +available otherwise. To see which EDM definitions are available in a data file +use the `podio-dump` utility + +```bash +podio-dump +``` +which will give an (exemplary) output like this +``` +input file: + +EDM model definitions stored in this file: edm4hep + +[...] +``` + +To actually dump the model definition to stdout use the `--dump-edm` option +and the name of the datamodel you want to dump: + +```bash +podio-dump --dump-edm edm4hep > dumped_edm4hep.yaml +``` + +Here we directly redirected the output to a yaml file that can then again be +used by the `podio_class_generator.py` to generate the corresponding c++ code +(or be passed to the cmake macros). + +**Note that the dumped EDM definition is equivalent but not necessarily exactly +the same as the original EDM definition.** E.g. all the datatypes will have all +their fields (`Members`, `OneToOneRelations`, `OneToManyRelations`, +`VectorMembers`) defined, and defaulted to empty lists in case they were not +present in the original EDM definition. The reason for this is that the embedded +EDM definition is the pre-processed and validated one [as described +below](#technical-details-on-edm-definition-embedding) + +### Accessing the EDM definition programmatically +The EDM definition can also be accessed programmatically via the +`[ROOT|SIO]FrameReader::getEDMDefinition` method. It takes an EDM name as its +single argument and returns the EDM definition as a JSON string. Most likely +this has to be decoded into an actual JSON structure in order to be usable (e.g. +via `json.loads` in python to get a `dict`). + +### Technical details on EDM definition embedding +The EDM definition is embedded into the core EDM library as a raw string literal +in JSON format. This string is generated into the `DatamodelDefinition.h` file as + +```cpp +namespace ::meta { +static constexpr auto __JSONDefinition = R"EDMDEFINITION()EDMDEFINITION"; +} +``` + +where `` is the name of the EDM as passed to the +`podio_class_generator.py` (or the cmake macro). The `` +is obtained from the pre-processed EDM definition that is read from the yaml +file. During this pre-processing the EDM definition is validated, and optional +fields are filled with empty defaults. Additionally, the `includeSubfolder` +option will be populated with the actual include subfolder, in case it has been +set to `True` in the yaml file. Since the json encoded definition is generated +right before the pre-processed model is passed to the class generator, this +definition is equivalent, but not necessarily equal to the original definition. + +#### The `DatamodelRegistry` +To make access to information about currently loaded and available datamodels a +bit easier the `DatamodelRegistry` (singleton) keeps a map of all loaded +datamodels and provides access to this information possible. In this context we +refer to an *EDM* as the shared library (and the corresponding public headers) +that have been compiled from code that has been generated from a *datamodel +definition* in the original YAML file. In general whenever we refer to a +*datamodel* in this context we mean the enitity as a whole, i.e. its definition +in a YAML file, the concrete implementation as an EDM, as well as other related +information that is related to it. + +Currently the `DatamodelRegistry` provides mainly access to the original +definition of available datamodels via two methods: +```cpp +const std::string_view getDatamodelDefinition(const std::string& edmName) const; + +const std::string_view getDatamodelDefinition(size_t index) const; +``` + +where `index` can be obtained from each collection via +`getDatamodelRegistryIndex`. That in turn simply calls +`::meta::DatamodelRegistryIndex::value()`, another singleton like object +that takes care of registering an EDM definition to the `DatamodelRegistry` +during its static initialization. It is also defined in the +`DatamodelDefinition.h` header. + +Since the datamodel definition is embedded as a raw string literal into the core +EDM shared library, it is in principle also relatively straight forward to +retrieve it from this library by inspecting the binary, e.g. via +```bash +readelf -p .rodata libedm4hep.so | grep options +``` + +which will result in something like + +``` + [ 300] {"options": {"getSyntax": true, "exposePODMembers": false, "includeSubfolder": "edm4hep/"}, "components": {<...>}, "datatypes": {<...>}} +``` + +#### I/O helpers for EDM definition storing +The `podio/utilities/DatamodelRegistryIOHelpers.h` header defines two utility +classes, that help with instrumenting readers and writers with functionality to +read and write all the necessary EDM definitions. + +- The `DatamodelDefinitionCollector` is intended for usage in writers. It + essentially collects the datamodel definitions of all the collections it encounters. + The `registerDatamodelDefinition` method it provides should be called with every collection + that is written. The `getDatamodelDefinitionsToWrite` method returns a vector of all + datamodel names and their definition that were encountered during writing. **It is + then the writers responsibility to actually store this information into the + file**. +- The `DatamodelDefinitionHolder` is intended to be used by readers. It + provides the `getDatamodelDefinition` and `getAvailableDatamodels` methods. + **It is again the readers property to correctly populate it with the data it + has read from file.** Currently the `SIOFrameReader` and the `ROOTFrameReader` + use it and also offer the same functionality as public methods with the help + of it. diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index 670291ba3..fcb81401a 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -76,6 +76,9 @@ class CollectionBase { /// print this collection to the passed stream virtual void print(std::ostream& os = std::cout, bool flush = true) const = 0; + + /// Get the index in the DatatypeRegistry of the EDM this collection belongs to + virtual size_t getDatamodelRegistryIndex() const = 0; }; } // namespace podio diff --git a/include/podio/DatamodelRegistry.h b/include/podio/DatamodelRegistry.h new file mode 100644 index 000000000..a32aa8218 --- /dev/null +++ b/include/podio/DatamodelRegistry.h @@ -0,0 +1,99 @@ +#ifndef PODIO_DATAMODELREGISTRY_H +#define PODIO_DATAMODELREGISTRY_H + +#include +#include +#include +#include + +namespace podio { + +/** + * Global registry holding information about datamodels and datatypes defined + * therein that are currently known by podio (i.e. which have been dynamically + * loaded). + * + * This is a singleton which is (statically) populated during dynamic loading of + * generated EDMs. In this context an **EDM refers to the shared library** that + * is compiled from the generated code from a datamodel definition in YAML + * format. When we refer to a **datamodel** in this context we talk about the + * entity as a whole, i.e. its definition in a YAML file, but also the concrete + * implementation as an EDM, as well as all other information that is related to + * it. In the API of this registry this will be used, unless we want to + * highlight that we are referring to a specific part of a datamodel. + */ +class DatamodelRegistry { +public: + /// Get the registry + static const DatamodelRegistry& instance(); + + // Mutable instance only used for the initial registration! + static DatamodelRegistry& mutInstance(); + + ~DatamodelRegistry() = default; + DatamodelRegistry(const DatamodelRegistry&) = delete; + DatamodelRegistry& operator=(const DatamodelRegistry&) = delete; + DatamodelRegistry(DatamodelRegistry&&) = delete; + DatamodelRegistry& operator=(const DatamodelRegistry&&) = delete; + + /// Dedicated index value for collections that don't have a datamodel + /// definition (e.g. UserDataCollection) + static constexpr size_t NoDefinitionNecessary = -1; + /// Dedicated index value for error checking, used to default init the generated RegistryIndex + static constexpr size_t NoDefinitionAvailable = -2; + + /** + * Get the definition (in JSON format) of the datamodel with the given + * edmName. + * + * If no datamodel with the given name can be found, an empty datamodel + * definition, i.e. an empty JSON object ("{}"), is returned. + * + * @param name The name of the datamodel + */ + const std::string_view getDatamodelDefinition(std::string_view name) const; + + /** + * Get the defintion (in JSON format) of the datamodel wth the given index. + * + * If no datamodel is found under the given index, an empty datamodel + * definition, i.e. an empty JSON object ("{}"), is returned. + * + * @param index The datamodel definition index that can be obtained from each + * collection + */ + const std::string_view getDatamodelDefinition(size_t index) const; + + /** + * Get the name of the datamodel that is stored under the given index. + * + * If no datamodel is found under the given index, an empty string is returned + * + * @param index The datamodel definition index that can be obtained from each + * collection + */ + const std::string& getDatamodelName(size_t index) const; + + /** + * Register a datamodel return the index in the registry. + * + * This is the hook that is called during dynamic loading of an EDM to + * register information for this EDM. If an EDM has already been registered + * under this name, than the index to the existing EDM in the registry will be + * returned. + * + * @param name The name of the EDM that should be registered + * @param definition The datamodel definition from which this EDM has been + * generated in JSON format + * + */ + size_t registerDatamodel(std::string name, std::string_view definition); + +private: + DatamodelRegistry() = default; + /// The stored definitions + std::vector> m_definitions{}; +}; +} // namespace podio + +#endif // PODIO_DATAMODELREGISTRY_H diff --git a/include/podio/ROOTFrameReader.h b/include/podio/ROOTFrameReader.h index 1850a0b02..1a2f48a4d 100644 --- a/include/podio/ROOTFrameReader.h +++ b/include/podio/ROOTFrameReader.h @@ -4,6 +4,7 @@ #include "podio/CollectionBranches.h" #include "podio/ROOTFrameData.h" #include "podio/podioVersion.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" #include "TChain.h" @@ -79,6 +80,16 @@ class ROOTFrameReader { /// Get the names of all the availalable Frame categories in the current file(s) std::vector getAvailableCategories() const; + /// Get the datamodel definition for the given name + const std::string_view getDatamodelDefinition(const std::string& name) const { + return m_datamodelHolder.getDatamodelDefinition(name); + } + + /// Get all names of the datamodels that ara available from this reader + std::vector getAvailableDatamodels() const { + return m_datamodelHolder.getAvailableDatamodels(); + } + private: /** * Helper struct to group together all the necessary state to read / process a @@ -132,6 +143,7 @@ class ROOTFrameReader { std::vector m_availCategories{}; ///< All available categories from this file podio::version::Version m_fileVersion{0, 0, 0}; + DatamodelDefinitionHolder m_datamodelHolder{}; }; } // namespace podio diff --git a/include/podio/ROOTFrameWriter.h b/include/podio/ROOTFrameWriter.h index 9428ed929..2546613d8 100644 --- a/include/podio/ROOTFrameWriter.h +++ b/include/podio/ROOTFrameWriter.h @@ -3,6 +3,7 @@ #include "podio/CollectionBranches.h" #include "podio/CollectionIDTable.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" #include "TFile.h" @@ -80,6 +81,8 @@ class ROOTFrameWriter { std::unique_ptr m_file{nullptr}; ///< The storage file std::unordered_map m_categories{}; ///< All categories + + DatamodelDefinitionCollector m_datamodelCollector{}; }; } // namespace podio diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 95e3de27f..3e02561b8 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -16,6 +17,7 @@ #include #include #include +#include namespace podio { @@ -26,6 +28,34 @@ void handlePODDataSIO(devT& device, PODData* data, size_t size) { device.data(dataPtr, count); } +/// Write anything that iterates like an std::map +template +void writeMapLike(sio::write_device& device, const MapLikeT& map) { + device.data((int)map.size()); + for (const auto& [key, value] : map) { + device.data(key); + device.data(value); + } +} + +/// Read anything that iterates like an std::map +template +void readMapLike(sio::read_device& device, MapLikeT& map) { + int size; + device.data(size); + while (size--) { + detail::GetKeyType key; + device.data(key); + detail::GetMappedType value; + device.data(value); + if constexpr (podio::detail::isVector) { + map.emplace_back(std::move(key), std::move(value)); + } else { + map.emplace(std::move(key), std::move(value)); + } + } +} + /// Base class for sio::block handlers used with PODIO class SIOBlock : public sio::block { @@ -141,6 +171,32 @@ class SIOEventMetaDataBlock : public sio::block { podio::GenericParameters* metadata{nullptr}; }; +/** + * A block to serialize anything that behaves similar in iterating as a + * map, e.g. vector>, which is what is used + * internally to represent the data to be written. + */ +template +struct SIOMapBlock : public sio::block { + SIOMapBlock() : sio::block("SIOMapBlock", sio::version::encode_version(0, 1)) { + } + SIOMapBlock(std::vector>&& data) : + sio::block("SIOMapBlock", sio::version::encode_version(0, 1)), mapData(std::move(data)) { + } + + SIOMapBlock(const SIOMapBlock&) = delete; + SIOMapBlock& operator=(const SIOMapBlock&) = delete; + + void read(sio::read_device& device, sio::version_type) override { + readMapLike(device, mapData); + } + void write(sio::write_device& device) override { + writeMapLike(device, mapData); + } + + std::vector> mapData{}; +}; + /** * A block for handling the run and collection meta data */ @@ -219,6 +275,9 @@ namespace sio_helpers { /// The name of the TOCRecord static constexpr const char* SIOTocRecordName = "podio_SIO_TOC_Record"; + /// The name of the record containing the EDM definitions in json format + static constexpr const char* SIOEDMDefinitionName = "podio_SIO_EDMDefinitions"; + // should hopefully be enough for all practical purposes using position_type = uint32_t; } // namespace sio_helpers diff --git a/include/podio/SIOFrameReader.h b/include/podio/SIOFrameReader.h index d7a2c5e8c..5fefdab75 100644 --- a/include/podio/SIOFrameReader.h +++ b/include/podio/SIOFrameReader.h @@ -4,6 +4,7 @@ #include "podio/SIOBlock.h" #include "podio/SIOFrameData.h" #include "podio/podioVersion.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" #include @@ -53,12 +54,24 @@ class SIOFrameReader { /// Get the names of all the availalable Frame categories in the current file(s) std::vector getAvailableCategories() const; + /// Get the datamodel definition for the given name + const std::string_view getDatamodelDefinition(const std::string& name) const { + return m_datamodelHolder.getDatamodelDefinition(name); + } + + /// Get all names of the datamodels that ara available from this reader + std::vector getAvailableDatamodels() const { + return m_datamodelHolder.getAvailableDatamodels(); + } + private: void readPodioHeader(); /// read the TOC record bool readFileTOCRecord(); + void readEDMDefinitions(); + sio::ifstream m_stream{}; ///< The stream from which we read /// Count how many times each an entry of this name has been read already @@ -68,6 +81,8 @@ class SIOFrameReader { SIOFileTOCRecord m_tocRecord{}; /// The podio version that has been used to write the file podio::version::Version m_fileVersion{0}; + + DatamodelDefinitionHolder m_datamodelHolder{}; }; } // namespace podio diff --git a/include/podio/SIOFrameWriter.h b/include/podio/SIOFrameWriter.h index 1ccc7a2e8..a8a7d084f 100644 --- a/include/podio/SIOFrameWriter.h +++ b/include/podio/SIOFrameWriter.h @@ -2,6 +2,7 @@ #define PODIO_SIOFRAMEWRITER_H #include "podio/SIOBlock.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" #include @@ -35,6 +36,7 @@ class SIOFrameWriter { private: sio::ofstream m_stream{}; ///< The output file stream SIOFileTOCRecord m_tocRecord{}; ///< The "table of contents" of the written file + DatamodelDefinitionCollector m_datamodelCollector{}; }; } // namespace podio diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index 2365c5094..7d28e2c99 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -3,6 +3,7 @@ #include "podio/CollectionBase.h" #include "podio/CollectionBuffers.h" +#include "podio/DatamodelRegistry.h" #include "podio/utilities/TypeHelpers.h" #include @@ -172,6 +173,10 @@ class UserDataCollection : public CollectionBase { } } + size_t getDatamodelRegistryIndex() const override { + return DatamodelRegistry::NoDefinitionNecessary; + } + // ----- some wrapers for std::vector and access to the complete std::vector (if really needed) typename std::vector::iterator begin() { diff --git a/include/podio/utilities/DatamodelRegistryIOHelpers.h b/include/podio/utilities/DatamodelRegistryIOHelpers.h new file mode 100644 index 000000000..4ca996ae6 --- /dev/null +++ b/include/podio/utilities/DatamodelRegistryIOHelpers.h @@ -0,0 +1,76 @@ +#ifndef PODIO_UTILITIES_DATAMODELREGISTRYIOHELPERS_H +#define PODIO_UTILITIES_DATAMODELREGISTRYIOHELPERS_H + +#include "podio/CollectionBase.h" +#include "podio/DatamodelRegistry.h" + +#include +#include +#include +#include + +namespace podio { + +/** + * Helper class to collect the datamodel (JSON) definitions that should be + * written. + */ +class DatamodelDefinitionCollector { +public: + /** + * Register the datamodel definition of the EDM this collection is from to be + * written. + * + * @param coll A collection of an EDM + * @param name The name under which this collection is stored on file + */ + void registerDatamodelDefinition(const podio::CollectionBase* coll, const std::string& name); + + /// Get all the names and JSON definitions that need to be written + std::vector> getDatamodelDefinitionsToWrite() const; + +private: + std::set m_edmDefRegistryIdcs{}; ///< The indices in the EDM definition registry that need to be written +}; + +/** + * Helper class to hold and provide the datamodel (JSON) definitions for reader + * classes. + */ +class DatamodelDefinitionHolder { +public: + /// The "map" type that is used internally + using MapType = std::vector>; + /// Constructor from an existing collection of names and datamodel definitions + DatamodelDefinitionHolder(MapType&& definitions) : m_availEDMDefs(std::move(definitions)) { + } + + DatamodelDefinitionHolder() = default; + ~DatamodelDefinitionHolder() = default; + DatamodelDefinitionHolder(const DatamodelDefinitionHolder&) = delete; + DatamodelDefinitionHolder& operator=(const DatamodelDefinitionHolder&) = delete; + DatamodelDefinitionHolder(DatamodelDefinitionHolder&&) = default; + DatamodelDefinitionHolder& operator=(DatamodelDefinitionHolder&&) = default; + + /** + * Get the datamodel definition for the given datamodel name. + * + * Returns an empty model definition if no model is stored under the given + * name. + * + * @param name The name of the datamodel + */ + const std::string_view getDatamodelDefinition(const std::string& name) const; + + /** + * Get all names of the datamodels that have been read from file + */ + std::vector getAvailableDatamodels() const; + +protected: + MapType m_availEDMDefs{}; +}; + +} // namespace podio + +#endif // PODIO_UTILITIES_DATAMODELREGISTRYIOHELPERS_H diff --git a/include/podio/utilities/TypeHelpers.h b/include/podio/utilities/TypeHelpers.h index b351e2118..74d1a4d28 100644 --- a/include/podio/utilities/TypeHelpers.h +++ b/include/podio/utilities/TypeHelpers.h @@ -1,9 +1,11 @@ #ifndef PODIO_UTILITIES_TYPEHELPERS_H #define PODIO_UTILITIES_TYPEHELPERS_H +#include #include #include #include +#include #include namespace podio { @@ -100,6 +102,62 @@ namespace detail { template static constexpr bool isVector = IsVectorHelper::value; + /** + * Helper struct to detect whether a type is a std::map or std::unordered_map + */ + template + struct IsMapHelper : std::false_type {}; + + template + struct IsMapHelper> : std::true_type {}; + + template + struct IsMapHelper> : std::true_type {}; + + /** + * Alias template for deciding whether the passed type T is a map or + * unordered_map + */ + template + static constexpr bool isMap = IsMapHelper::value; + + /** + * Helper struct to homogenize the (type) access for things that behave like + * maps, e.g. vectors of pairs (and obviously maps). + * + * NOTE: This is not SFINAE friendly. + */ + template >, + typename IsVector = std::bool_constant && (std::tuple_size() == 2)>> + struct MapLikeTypeHelper {}; + + /** + * Specialization for actual maps + */ + template + struct MapLikeTypeHelper, std::bool_constant> { + using key_type = typename T::key_type; + using mapped_type = typename T::mapped_type; + }; + + /** + * Specialization for vector of pairs / tuples (of size 2) + */ + template + struct MapLikeTypeHelper, std::bool_constant> { + using key_type = typename std::tuple_element<0, typename T::value_type>::type; + using mapped_type = typename std::tuple_element<1, typename T::value_type>::type; + }; + + /** + * Type aliases for easier usage in actual code + */ + template + using GetKeyType = typename MapLikeTypeHelper::key_type; + + template + using GetMappedType = typename MapLikeTypeHelper::mapped_type; + } // namespace detail // forward declaration to be able to use it below diff --git a/python/podio/base_reader.py b/python/podio/base_reader.py index b45cfa3f1..88d3acc3e 100644 --- a/python/podio/base_reader.py +++ b/python/podio/base_reader.py @@ -55,3 +55,28 @@ def is_legacy(self): bool: True if this is a legacy file reader """ return self._is_legacy + + @property + def datamodel_definitions(self): + """Get the available datamodel definitions from this reader. + + Returns: + tuple(str): The names of the available datamodel definitions + """ + if self._is_legacy: + return () + return tuple(n.c_str() for n in self._reader.getAvailableDatamodels()) + + def get_datamodel_definition(self, edm_name): + """Get the datamodel definition as JSON string. + + Args: + str: The name of the datamodel + + Returns: + str: The complete model definition in JSON format. Use, e.g. json.loads + to convert it into a python dictionary. + """ + if self._is_legacy: + return "" + return self._reader.getDatamodelDefinition(edm_name).data() diff --git a/python/podio/generator_utils.py b/python/podio/generator_utils.py index 9711dcdf1..e50b139e7 100644 --- a/python/podio/generator_utils.py +++ b/python/podio/generator_utils.py @@ -4,6 +4,7 @@ """ import re +import json def _get_namespace_class(full_type): @@ -183,12 +184,19 @@ def setter_name(self, get_syntax, is_relation=False): return self.name return _prefix_name(self.name, 'set') + def _to_json(self): + """Return a string representation that can be parsed again.""" + # The __str__ method is geared towards c++ too much, so we have to build + # things again here from available information + def_val = f'{{{self.default_val}}}' if self.default_val else '' + description = f' // {self.description}' if self.description else '' + return f'{self.full_type} {self.name}{def_val}{description}' + class DataModel: # pylint: disable=too-few-public-methods """A class for holding a complete datamodel read from a configuration file""" + def __init__(self, datatypes=None, components=None, options=None): - self.datatypes = datatypes or {} - self.components = components or {} self.options = options or { # should getters / setters be prefixed with get / set? "getSyntax": False, @@ -197,3 +205,22 @@ def __init__(self, datatypes=None, components=None, options=None): # use subfolder when including package header files "includeSubfolder": False, } + self.components = components or {} + self.datatypes = datatypes or {} + + def _to_json(self): + """Return the dictionary, so that we can easily hook this into the pythons + JSON ecosystem""" + return self.__dict__ + + +class DataModelJSONEncoder(json.JSONEncoder): + """A JSON encoder for DataModels, resp. anything hat has a _to_json method.""" + + def default(self, o): + """The override for the default, first trying to call _to_json, otherwise + handing off to the default JSONEncoder""" + try: + return o._to_json() # pylint: disable=protected-access + except AttributeError: + return super().default(o) diff --git a/python/podio/podio_config_reader.py b/python/podio/podio_config_reader.py index a72bcce06..3992a3aa9 100644 --- a/python/podio/podio_config_reader.py +++ b/python/podio/podio_config_reader.py @@ -407,24 +407,21 @@ def _read_datatype(cls, value): return datatype @classmethod - def read(cls, yamlfile, package_name, upstream_edm=None): - """Read the datamodel definition from the yamlfile.""" - with open(yamlfile, "r", encoding='utf-8') as stream: - content = yaml.load(stream, yaml.SafeLoader) - + def parse_model(cls, model_dict, package_name, upstream_edm=None): + """Parse a model from the dictionary, e.g. read from a yaml file.""" components = {} - if "components" in content: - for klassname, value in content["components"].items(): + if "components" in model_dict: + for klassname, value in model_dict["components"].items(): components[klassname] = cls._read_component(value) datatypes = {} - if "datatypes" in content: - for klassname, value in content["datatypes"].items(): + if "datatypes" in model_dict: + for klassname, value in model_dict["datatypes"].items(): datatypes[klassname] = cls._read_datatype(value) options = copy.deepcopy(cls.options) - if "options" in content: - for option, value in content["options"].items(): + if "options" in model_dict: + for option, value in model_dict["options"].items(): options[option] = value # Normalize the includeSubfoler internally already here @@ -438,3 +435,11 @@ def read(cls, yamlfile, package_name, upstream_edm=None): datamodel = DataModel(datatypes, components, options) validator.validate(datamodel, upstream_edm) return datamodel + + @classmethod + def read(cls, yamlfile, package_name, upstream_edm=None): + """Read the datamodel definition from the yamlfile.""" + with open(yamlfile, "r", encoding='utf-8') as stream: + content = yaml.load(stream, yaml.SafeLoader) + + return cls.parse_model(content, package_name, upstream_edm) diff --git a/python/podio/test_DataModelJSONEncoder.py b/python/podio/test_DataModelJSONEncoder.py new file mode 100644 index 000000000..b63ff22f0 --- /dev/null +++ b/python/podio/test_DataModelJSONEncoder.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +"""Unit tests for the JSON encoding of data models""" + +import unittest + +from podio.generator_utils import DataModelJSONEncoder +from podio.podio_config_reader import MemberParser + + +def get_member_var_json(string): + """Get a MemberVariable encoded as JSON from the passed string. + + Passes through the whole chain of parsing and JSON encoding, as it is done + during data model encoding. + + Args: + string (str): The member variable definition as a string. NOTE: here it is + assumed that this is a valid string that can be parsed. + + Returns: + str: The json encoded member variable + """ + parser = MemberParser() + member_var = parser.parse(string, False) # be lenient with missing descriptions + return DataModelJSONEncoder().encode(member_var).strip('"') # strip quotes from JSON + + +class DataModelJSONEncoderTest(unittest.TestCase): + """Unit tests for the DataModelJSONEncoder and the utility functionality in MemberVariable""" + + def test_encode_only_types(self): + """Test that encoding works for type declarations only""" + for mdef in (r"float someFloat", + r"ArbitraryType name", + r"std::int16_t fixedWidth", + r"namespace::Type type"): + self.assertEqual(get_member_var_json(mdef), mdef) + + # Fixed with without std are encoded with std namespace + fixed_w = r"int32_t fixedWidth" + self.assertEqual(get_member_var_json(fixed_w), f"std::{fixed_w}") + + def test_encode_array_types(self): + """Test that encoding array member variable declarations work""" + for mdef in (r"std::array anArray", + r"std::array fwArr", + r"std::array typeArr", + r"std::array namespacedTypeArr"): + self.assertEqual(get_member_var_json(mdef), mdef) + + def test_encode_default_vals(self): + """Test that encoding definitions with default values works""" + for mdef in (r"int i{42}", + r"std::uint32_t uint{64}", + r"ArbType a{123}", + r"namespace::Type t{whatever}", + r"std::array fs{3.14f, 6.28f}", + r"std::array typeArr{1, 2, 3}"): + self.assertEqual(get_member_var_json(mdef), mdef) + + def test_encode_with_description(self): + """Test that encoding definitions that contain a description works""" + for mdef in (r"int i // an unitialized int", + r"std::uint32_t ui{42} // an initialized unsigned int", + r"std::array fs // a float array", + r"std::array tA{1, 2, 3} // an initialized array of namespaced types", + r"AType type // a very special type", + r"nsp::Type nspT // a namespaced type", + r"nsp::Type nspT{with init} // an initialized namespaced type", + r"ArbitratyType arbT{42} // an initialized type"): + self.assertEqual(get_member_var_json(mdef), mdef) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 7c6fc5b40..ec015230c 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -17,7 +17,7 @@ import jinja2 from podio.podio_config_reader import PodioConfigReader -from podio.generator_utils import DataType, DefinitionError +from podio.generator_utils import DataType, DefinitionError, DataModelJSONEncoder THIS_DIR = os.path.dirname(os.path.abspath(__file__)) TEMPLATE_DIR = os.path.join(THIS_DIR, 'templates') @@ -113,6 +113,8 @@ def process(self): for name, datatype in self.datamodel.datatypes.items(): self._process_datatype(name, datatype) + self._write_edm_def_file() + if 'ROOT' in self.io_handlers: self._create_selection_xml() self.print_report() @@ -203,6 +205,9 @@ def _fill_templates(self, template_base, data): def _process_component(self, name, component): """Process one component""" + # Make a copy here and add the preprocessing steps to that such that the + # original definition can be left untouched + component = deepcopy(component) includes = set() includes.update(*(m.includes for m in component['Members'])) @@ -368,6 +373,18 @@ def _preprocess_datatype(self, name, definition): return data + def _write_edm_def_file(self): + """Write the edm definition to a compile time string""" + model_encoder = DataModelJSONEncoder() + data = { + 'package_name': self.package_name, + 'edm_definition': model_encoder.encode(self.datamodel), + 'incfolder': self.incfolder, + } + + self._write_file('DatamodelDefinition.h', + self._eval_template('DatamodelDefinition.h.jinja2', data)) + def _get_member_includes(self, members): """Process all members and gather the necessary includes""" includes = set() diff --git a/python/templates/CMakeLists.txt b/python/templates/CMakeLists.txt index c3c382ad5..be5f4b307 100644 --- a/python/templates/CMakeLists.txt +++ b/python/templates/CMakeLists.txt @@ -14,6 +14,7 @@ set(PODIO_TEMPLATES ${CMAKE_CURRENT_LIST_DIR}/selection.xml.jinja2 ${CMAKE_CURRENT_LIST_DIR}/SIOBlock.cc.jinja2 ${CMAKE_CURRENT_LIST_DIR}/SIOBlock.h.jinja2 + ${CMAKE_CURRENT_LIST_DIR}/DatamodelDefinition.h.jinja2 ${CMAKE_CURRENT_LIST_DIR}/macros/collections.jinja2 ${CMAKE_CURRENT_LIST_DIR}/macros/declarations.jinja2 ${CMAKE_CURRENT_LIST_DIR}/macros/implementations.jinja2 diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index c265c29a7..8c121de20 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -4,6 +4,7 @@ // AUTOMATICALLY GENERATED FILE - DO NOT EDIT #include "{{ incfolder }}{{ class.bare_type }}Collection.h" +#include "{{ incfolder }}DatamodelDefinition.h" {% for include in includes_coll_cc %} {{ include }} @@ -178,6 +179,10 @@ podio::CollectionReadBuffers {{ collection_type }}::createBuffers() /*const*/ { {{ macros.vectorized_access(class, member) }} {% endfor %} +size_t {{ collection_type }}::getDatamodelRegistryIndex() const { + return {{ package_name }}::meta::DatamodelRegistryIndex::value(); +} + #ifdef PODIO_JSON_OUTPUT void to_json(nlohmann::json& j, const {{ collection_type }}& collection) { j = nlohmann::json::array(); diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index 049ecff79..2c1a80e3b 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -130,6 +130,8 @@ public: return m_isValid; } + size_t getDatamodelRegistryIndex() const final; + // support for the iterator protocol iterator begin() { return iterator(0, &m_storage.entries); diff --git a/python/templates/DatamodelDefinition.h.jinja2 b/python/templates/DatamodelDefinition.h.jinja2 new file mode 100644 index 000000000..17a300cb9 --- /dev/null +++ b/python/templates/DatamodelDefinition.h.jinja2 @@ -0,0 +1,30 @@ +// AUTOMATICALLY GENERATED FILE - DO NOT EDIT + +#include "podio/DatamodelRegistry.h" + +namespace {{ package_name }}::meta { +/** + * The complete definition of the datamodel at generation time in JSON format. + */ +static constexpr auto {{ package_name }}__JSONDefinition = R"DATAMODELDEF({{ edm_definition }})DATAMODELDEF"; + +/** + * The helper class that takes care of registering the datamodel definition to + * the DatamodelRegistry and to provide the index in that registry. + * + * Implemented as a singleton mainly to ensure only a single registration of + * each datamodel, during the constructor + */ +class DatamodelRegistryIndex { +public: + static size_t value() { + static auto index = DatamodelRegistryIndex(podio::DatamodelRegistry::mutInstance().registerDatamodel("{{ package_name }}", {{ package_name }}__JSONDefinition)); + return index.m_value; + } + +private: + DatamodelRegistryIndex(size_t v) : m_value(v) {} + size_t m_value{podio::DatamodelRegistry::NoDefinitionAvailable}; +}; + +} // namespace {{ package_name }}::meta diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d0017cba7..589c073a6 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -48,7 +48,10 @@ SET(core_sources CollectionIDTable.cc GenericParameters.cc ASCIIWriter.cc - EventStore.cc) + EventStore.cc + DatamodelRegistry.cc + DatamodelRegistryIOHelpers.cc + ) SET(core_headers ${CMAKE_SOURCE_DIR}/include/podio/CollectionBase.h @@ -59,6 +62,8 @@ SET(core_headers ${CMAKE_SOURCE_DIR}/include/podio/ObjectID.h ${CMAKE_SOURCE_DIR}/include/podio/UserDataCollection.h ${CMAKE_SOURCE_DIR}/include/podio/podioVersion.h + ${CMAKE_SOURCE_DIR}/include/podio/DatamodelRegistry.h + ${CMAKE_SOURCE_DIR}/include/podio/utilities/DatamodelRegistryIOHelpers.h ) PODIO_ADD_LIB_AND_DICT(podio "${core_headers}" "${core_sources}" selection.xml) diff --git a/src/DatamodelRegistry.cc b/src/DatamodelRegistry.cc new file mode 100644 index 000000000..d5a96e364 --- /dev/null +++ b/src/DatamodelRegistry.cc @@ -0,0 +1,63 @@ +#include "podio/DatamodelRegistry.h" + +#include +#include +#include +#include + +namespace podio { +const DatamodelRegistry& DatamodelRegistry::instance() { + return mutInstance(); +} + +DatamodelRegistry& DatamodelRegistry::mutInstance() { + static DatamodelRegistry registryInstance; + return registryInstance; +} + +size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view definition) { + const auto it = std::find_if(m_definitions.cbegin(), m_definitions.cend(), + [&name](const auto& kvPair) { return kvPair.first == name; }); + + if (it == m_definitions.cend()) { + int index = m_definitions.size(); + m_definitions.emplace_back(name, definition); + return index; + } + + // TODO: Output? + return std::distance(m_definitions.cbegin(), it); +} + +const std::string_view DatamodelRegistry::getDatamodelDefinition(std::string_view name) const { + const auto it = std::find_if(m_definitions.cbegin(), m_definitions.cend(), + [&name](const auto& kvPair) { return kvPair.first == name; }); + if (it == m_definitions.cend()) { + std::cerr << "PODIO WARNING: Cannot find the definition for the EDM with the name " << name << std::endl; + static constexpr std::string_view emptyDef = "{}"; // valid empty JSON + return emptyDef; + } + + return it->second; +} + +const std::string_view DatamodelRegistry::getDatamodelDefinition(size_t index) const { + if (index >= m_definitions.size()) { + std::cerr << "PODIO WARNING: Cannot find the definition for the EDM with the index " << index << std::endl; + static constexpr std::string_view emptyDef = "{}"; // valid empty JSON + return emptyDef; + } + + return m_definitions[index].second; +} + +const std::string& DatamodelRegistry::getDatamodelName(size_t index) const { + if (index >= m_definitions.size()) { + std::cout << "PODIO WARNING: Cannot find the name of the EDM with the index " << index << std::endl; + static const std::string emptyName = ""; + return emptyName; + } + return m_definitions[index].first; +} + +} // namespace podio diff --git a/src/DatamodelRegistryIOHelpers.cc b/src/DatamodelRegistryIOHelpers.cc new file mode 100644 index 000000000..901dbb113 --- /dev/null +++ b/src/DatamodelRegistryIOHelpers.cc @@ -0,0 +1,49 @@ +#include "podio/utilities/DatamodelRegistryIOHelpers.h" +#include + +namespace podio { + +void DatamodelDefinitionCollector::registerDatamodelDefinition(const podio::CollectionBase* coll, + const std::string& name) { + const auto edmIndex = coll->getDatamodelRegistryIndex(); + if (edmIndex == DatamodelRegistry::NoDefinitionAvailable) { + std::cerr << "No EDM definition available for collection " << name << std::endl; + } else { + if (edmIndex != DatamodelRegistry::NoDefinitionNecessary) { + m_edmDefRegistryIdcs.insert(edmIndex); + } + } +} + +std::vector> DatamodelDefinitionCollector::getDatamodelDefinitionsToWrite() const { + std::vector> edmDefinitions; + edmDefinitions.reserve(m_edmDefRegistryIdcs.size()); + for (const auto& index : m_edmDefRegistryIdcs) { + const auto& edmRegistry = podio::DatamodelRegistry::instance(); + edmDefinitions.emplace_back(edmRegistry.getDatamodelName(index), edmRegistry.getDatamodelDefinition(index)); + } + + return edmDefinitions; +} + +const std::string_view DatamodelDefinitionHolder::getDatamodelDefinition(const std::string& name) const { + const auto it = std::find_if(m_availEDMDefs.cbegin(), m_availEDMDefs.cend(), + [&name](const auto& entry) { return std::get<0>(entry) == name; }); + + if (it != m_availEDMDefs.cend()) { + return std::get<1>(*it); + } + + return "{}"; +} + +std::vector DatamodelDefinitionHolder::getAvailableDatamodels() const { + std::vector defs{}; + defs.reserve(m_availEDMDefs.size()); + std::transform(m_availEDMDefs.cbegin(), m_availEDMDefs.cend(), std::back_inserter(defs), + [](const auto& elem) { return std::get<0>(elem); }); + + return defs; +} + +} // namespace podio diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index e3d6c6aba..f8880133c 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -179,6 +179,7 @@ std::vector getAvailableCategories(TChain* metaChain) { auto* branches = metaChain->GetListOfBranches(); std::vector brNames; brNames.reserve(branches->GetEntries()); + for (int i = 0; i < branches->GetEntries(); ++i) { const std::string name = branches->At(i)->GetName(); const auto fUnder = name.find("___"); @@ -189,7 +190,6 @@ std::vector getAvailableCategories(TChain* metaChain) { std::sort(brNames.begin(), brNames.end()); brNames.erase(std::unique(brNames.begin(), brNames.end()), brNames.end()); - return brNames; } @@ -217,6 +217,14 @@ void ROOTFrameReader::openFiles(const std::vector& filenames) { m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0}; delete versionPtr; + if (auto* edmDefBranch = root_utils::getBranch(m_metaChain.get(), root_utils::edmDefBranchName)) { + auto* datamodelDefs = new DatamodelDefinitionHolder::MapType{}; + edmDefBranch->SetAddress(&datamodelDefs); + edmDefBranch->GetEntry(0); + m_datamodelHolder = DatamodelDefinitionHolder(std::move(*datamodelDefs)); + delete datamodelDefs; + } + // Do some work up front for setting up categories and setup all the chains // and record the available categories. The rest of the setup follows on // demand when the category is first read diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index d98d6763a..3f552d69f 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -35,6 +35,8 @@ void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& c for (const auto& name : catInfo.collsToWrite) { auto* coll = frame.getCollectionForWrite(name); collections.emplace_back(name, const_cast(coll)); + + m_datamodelCollector.registerDatamodelDefinition(coll, name); } // We will at least have a parameters branch, even if there are no @@ -129,6 +131,9 @@ void ROOTFrameWriter::finish() { auto podioVersion = podio::version::build_version; metaTree->Branch(root_utils::versionBranchName, &podioVersion); + auto edmDefinitions = m_datamodelCollector.getDatamodelDefinitionsToWrite(); + metaTree->Branch(root_utils::edmDefBranchName, &edmDefinitions); + metaTree->Fill(); m_file->Write(); diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index 6c7a95ba6..c0a514e6a 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -49,41 +49,19 @@ void SIOCollectionIDTableBlock::write(sio::write_device& device) { device.data(_isSubsetColl); } -template -void writeParamMap(sio::write_device& device, const GenericParameters::MapType& map) { - device.data((int)map.size()); - for (const auto& [key, value] : map) { - device.data(key); - device.data(value); - } -} - -template -void readParamMap(sio::read_device& device, GenericParameters::MapType& map) { - int size; - device.data(size); - while (size--) { - std::string key; - device.data(key); - std::vector values; - device.data(values); - map.emplace(std::move(key), std::move(values)); - } -} - void writeGenericParameters(sio::write_device& device, const GenericParameters& params) { - writeParamMap(device, params.getIntMap()); - writeParamMap(device, params.getFloatMap()); - writeParamMap(device, params.getStringMap()); - writeParamMap(device, params.getDoubleMap()); + writeMapLike(device, params.getIntMap()); + writeMapLike(device, params.getFloatMap()); + writeMapLike(device, params.getStringMap()); + writeMapLike(device, params.getDoubleMap()); } void readGenericParameters(sio::read_device& device, GenericParameters& params, sio::version_type version) { - readParamMap(device, params.getIntMap()); - readParamMap(device, params.getFloatMap()); - readParamMap(device, params.getStringMap()); + readMapLike(device, params.getIntMap()); + readMapLike(device, params.getFloatMap()); + readMapLike(device, params.getStringMap()); if (version >= sio::version::encode_version(0, 2)) { - readParamMap(device, params.getDoubleMap()); + readMapLike(device, params.getDoubleMap()); } } @@ -148,7 +126,7 @@ SIOBlockLibraryLoader::SIOBlockLibraryLoader() { const auto status = loadLib(lib); switch (status) { case LoadStatus::Success: - std::cout << "Loaded SIOBlocks library \'" << lib << "\' (from " << dir << ")" << std::endl; + std::cerr << "Loaded SIOBlocks library \'" << lib << "\' (from " << dir << ")" << std::endl; break; case LoadStatus::AlreadyLoaded: std::cerr << "SIOBlocks library \'" << lib << "\' already loaded. Not loading again from " << dir << std::endl; diff --git a/src/SIOFrameReader.cc b/src/SIOFrameReader.cc index 47f5ec082..0997ae8dc 100644 --- a/src/SIOFrameReader.cc +++ b/src/SIOFrameReader.cc @@ -6,6 +6,7 @@ #include #include +#include #include namespace podio { @@ -23,6 +24,7 @@ void SIOFrameReader::openFile(const std::string& filename) { // NOTE: reading TOC record first because that jumps back to the start of the file! readFileTOCRecord(); readPodioHeader(); + readEDMDefinitions(); // Potentially could do this lazily } std::unique_ptr SIOFrameReader::readNextEntry(const std::string& name) { @@ -54,7 +56,13 @@ std::unique_ptr SIOFrameReader::readEntry(const std::string& name, } std::vector SIOFrameReader::getAvailableCategories() const { - return m_tocRecord.getRecordNames(); + // Filter the availalbe records from the TOC to remove records that are + // stored, but use reserved record names for podio meta data + auto recordNames = m_tocRecord.getRecordNames(); + recordNames.erase(std::remove_if(recordNames.begin(), recordNames.end(), + [](const auto& elem) { return elem == sio_helpers::SIOEDMDefinitionName; }), + recordNames.end()); + return recordNames; } unsigned SIOFrameReader::getEntries(const std::string& name) const { @@ -101,4 +109,22 @@ void SIOFrameReader::readPodioHeader() { m_fileVersion = static_cast(blocks[0].get())->version; } +void SIOFrameReader::readEDMDefinitions() { + const auto recordPos = m_tocRecord.getPosition(sio_helpers::SIOEDMDefinitionName); + if (recordPos == 0) { + // No EDM definitions found + return; + } + m_stream.seekg(recordPos); + + const auto& [buffer, _] = sio_utils::readRecord(m_stream); + + sio::block_list blocks; + blocks.emplace_back(std::make_shared>()); + sio::api::read_blocks(buffer.span(), blocks); + + auto datamodelDefs = static_cast*>(blocks[0].get()); + m_datamodelHolder = DatamodelDefinitionHolder(std::move(datamodelDefs->mapData)); +} + } // namespace podio diff --git a/src/SIOFrameWriter.cc b/src/SIOFrameWriter.cc index f33bdbccc..360c948d2 100644 --- a/src/SIOFrameWriter.cc +++ b/src/SIOFrameWriter.cc @@ -8,6 +8,7 @@ #include "sioUtils.h" #include +#include namespace podio { @@ -35,6 +36,7 @@ void SIOFrameWriter::writeFrame(const podio::Frame& frame, const std::string& ca collections.reserve(collsToWrite.size()); for (const auto& name : collsToWrite) { collections.emplace_back(name, frame.getCollectionForWrite(name)); + m_datamodelCollector.registerDatamodelDefinition(collections.back().second, name); } // Write necessary metadata and the actual data into two different records. @@ -49,7 +51,14 @@ void SIOFrameWriter::writeFrame(const podio::Frame& frame, const std::string& ca } void SIOFrameWriter::finish() { + auto edmDefMap = std::make_shared>( + m_datamodelCollector.getDatamodelDefinitionsToWrite()); + sio::block_list blocks; + blocks.push_back(edmDefMap); + m_tocRecord.addRecord(sio_helpers::SIOEDMDefinitionName, sio_utils::writeRecord(blocks, "EDMDefinitions", m_stream)); + + blocks.clear(); blocks.emplace_back(std::make_shared(&m_tocRecord)); auto tocStartPos = sio_utils::writeRecord(blocks, sio_helpers::SIOTocRecordName, m_stream); diff --git a/src/rootUtils.h b/src/rootUtils.h index 5bce3d702..215c7fea6 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -7,6 +7,7 @@ #include "podio/CollectionIDTable.h" #include "TBranch.h" +#include "TChain.h" #include "TClass.h" #include "TTree.h" @@ -35,6 +36,12 @@ constexpr static auto paramBranchName = "PARAMETERS"; */ constexpr static auto versionBranchName = "PodioBuildVersion"; +/** + * The name of the branch in which all the EDM names and their definitions are + * stored in the meta data tree. + */ +constexpr static auto edmDefBranchName = "EDMDefinitions"; + /** * Name of the branch for storing the idTable for a given category in the meta * data tree diff --git a/src/selection.xml b/src/selection.xml index 3c0be36e3..d198bfab6 100644 --- a/src/selection.xml +++ b/src/selection.xml @@ -15,6 +15,7 @@ + diff --git a/src/sioUtils.h b/src/sioUtils.h index 6e340d8fa..204867eaf 100644 --- a/src/sioUtils.h +++ b/src/sioUtils.h @@ -9,6 +9,7 @@ #include #include +#include #include namespace podio { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ad24cbf31..2b056bc74 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -73,7 +73,7 @@ endforeach() if (NOT DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR} OR NOT EXISTS ${PODIO_TEST_INPUT_DATA_DIR}/example.root) message("Getting test input files") execute_process( - COMMAND bash ${CMAKE_CURRENT_LIST_DIR}/get_test_inputs.sh + COMMAND bash ${CMAKE_CURRENT_LIST_DIR}/scripts/get_test_inputs.sh OUTPUT_VARIABLE podio_test_input_data_dir RESULT_VARIABLE test_inputs_available ) @@ -245,3 +245,47 @@ else() LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} ) endif() + +# Add tests for storing and retrieving the EDM definitions into the produced +# files +add_test(datamodel_def_store_roundtrip_root ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh ${CMAKE_CURRENT_BINARY_DIR}/example_frame.root datamodel) +add_test(datamodel_def_store_roundtrip_root_extension ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh ${CMAKE_CURRENT_BINARY_DIR}/example_frame.root datamodel extension_datamodel) + + +# Need the input files that are produced by other tests +set_tests_properties( + datamodel_def_store_roundtrip_root + datamodel_def_store_roundtrip_root_extension + PROPERTIES + DEPENDS write_frame_root + ) + +set(sio_roundtrip_tests "") +if (TARGET read_sio) + add_test(datamodel_def_store_roundtrip_sio ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh ${CMAKE_CURRENT_BINARY_DIR}/example_frame.sio datamodel) + add_test(datamodel_def_store_roundtrip_sio_extension ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh ${CMAKE_CURRENT_BINARY_DIR}/example_frame.sio datamodel extension_datamodel) + + set(sio_roundtrip_tests + datamodel_def_store_roundtrip_sio + datamodel_def_store_roundtrip_sio_extension + ) + + set_tests_properties( + ${sio_roundtrip_tests} + PROPERTIES + DEPENDS write_frame_sio + ) +endif() + +# We need to convert this into a list of arguments that can be used as environment variable +list(JOIN PODIO_IO_HANDLERS " " IO_HANDLERS) + +set_tests_properties( + datamodel_def_store_roundtrip_root + datamodel_def_store_roundtrip_root_extension + ${sio_roundtrip_tests} + PROPERTIES + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ENVIRONMENT + "PODIO_BASE=${CMAKE_SOURCE_DIR};IO_HANDLERS=${IO_HANDLERS};LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH};PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH};ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH}" + ) diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index 6d1e4e165..71812378a 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -52,6 +52,11 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ podio-dump-sio podio-dump-detailed-sio podio-dump-detailed-sio-legacy + + datamodel_def_store_roundtrip_root + datamodel_def_store_roundtrip_root_extension + datamodel_def_store_roundtrip_sio + datamodel_def_store_roundtrip_sio_extension ) # ostream_operator is working with Memory sanitizer (at least locally) diff --git a/tests/scripts/dumpModelRoundTrip.sh b/tests/scripts/dumpModelRoundTrip.sh new file mode 100755 index 000000000..9f9bc2148 --- /dev/null +++ b/tests/scripts/dumpModelRoundTrip.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Script to check that an EDM definition dumped from a file is "equivalent" to +# the original definition. Essentially does not check that the YAML file is the +# same, but rather that the generated code is the same + +set -eu + +INPUT_FILE=${1} # the datafile +EDM_NAME=${2} # the name of the EDM +COMP_BASE_FOLDER="" # where the source to compare against is +if [ -$# -gt 2 ]; then + COMP_BASE_FOLDER=${3} +fi + +# Create a few temporary but unique files and directories to store output +DUMPED_MODEL=${INPUT_FILE}.dumped_${EDM_NAME}.yaml +OUTPUT_FOLDER=${INPUT_FILE}.dumped_${EDM_NAME} +mkdir -p ${OUTPUT_FOLDER} + +# Dump the model to a yaml file +${PODIO_BASE}/tools/podio-dump --dump-edm ${EDM_NAME} ${INPUT_FILE} > ${DUMPED_MODEL} + +# Regenerate the code via the class generator and the freshly dumped modl +${PODIO_BASE}/python/podio_class_generator.py \ + --clangformat \ + ${DUMPED_MODEL} \ + ${OUTPUT_FOLDER} \ + ${EDM_NAME} \ + ${IO_HANDLERS} + +# Compare to the originally generated code, that has been used to write the data +# file. Need to diff subfolders explitly here because $PODIO_BASE/tests contains +# more stuff +diff -ru ${OUTPUT_FOLDER}/${EDM_NAME} ${PODIO_BASE}/tests/${COMP_BASE_FOLDER}/${EDM_NAME} +diff -ru ${OUTPUT_FOLDER}/src ${PODIO_BASE}/tests/${COMP_BASE_FOLDER}/src +diff -u ${OUTPUT_FOLDER}/podio_generated_files.cmake ${PODIO_BASE}/tests/podio_generated_files.cmake diff --git a/tests/get_test_inputs.sh b/tests/scripts/get_test_inputs.sh similarity index 100% rename from tests/get_test_inputs.sh rename to tests/scripts/get_test_inputs.sh diff --git a/tools/podio-dump b/tools/podio-dump index efb5dfc18..8685aa19e 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -2,6 +2,8 @@ """podio-dump tool to dump contents of podio files""" import sys +import json +import yaml from podio.reading import get_reader @@ -15,9 +17,11 @@ def print_general_info(reader, filename): Args: reader (root_io.Reader, sio_io.Reader): An initialized reader """ - print(f'input file: {filename}\n') legacy_text = ' (this is a legacy file!)' if reader.is_legacy else '' - print(f'Frame categories in this file{legacy_text}:') + print(f'input file: {filename}{legacy_text}\n') + print(f'datamodel model definitions stored in this file: {", ".join(reader.datamodel_definitions)}') + print() + print('Frame categories in this file:') print(f'{"Name":<20} {"Entries":<10}') print('-' * 31) for category in reader.categories: @@ -68,6 +72,18 @@ def print_frame(frame, cat_name, ientry, detailed): print('\n', flush=True) +def dump_model(reader, model_name): + """Dump the model in yaml format""" + if model_name not in reader.datamodel_definitions: + print(f'ERROR: Cannot dump model \'{model_name}\' (not present in file)') + return False + + model_def = json.loads(reader.get_datamodel_definition(model_name)) + print(yaml.dump(model_def, sort_keys=False, default_flow_style=False)) + + return True + + def main(args): """Main""" try: @@ -76,6 +92,12 @@ def main(args): print(f'ERROR: Cannot open file \'{args.inputfile}\': {err}') sys.exit(1) + if args.dump_edm is not None: + if dump_model(reader, args.dump_edm): + sys.exit(0) + else: + sys.exit(1) + print_general_info(reader, args.inputfile) if args.category not in reader.categories: print(f'ERROR: Cannot print category \'{args.category}\' (not present in file)') @@ -120,6 +142,9 @@ if __name__ == '__main__': type=parse_entry_range, default=[0]) parser.add_argument('-d', '--detailed', help='Dump the full contents not just the collection info', action='store_true', default=False) + parser.add_argument('--dump-edm', + help='Dump the specified EDM definition from the file in yaml format', + type=str, default=None) clargs = parser.parse_args() main(clargs) From 2d3b3a2b10a6ea8b541010241f1bfd2c8d0f65fe Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Mon, 13 Mar 2023 16:45:47 +0100 Subject: [PATCH 046/100] Add .cache to the gitignore (#389) Co-authored-by: jmcarcell --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index cb2170b29..b665c600f 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,7 @@ spack* # Tooling /.clangd/ /compile_commands.json +/.cache/ # Generated files *podio_generated_files.cmake From 9ef4d3ce8573a2512b25ce635bc78a016f5e05c3 Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Tue, 14 Mar 2023 14:46:14 +0100 Subject: [PATCH 047/100] Check for all the arguments that are going to be used for clang-format (#390) * Modify initial clang check * Test for the existence of .clang-format * Fix linter --------- Co-authored-by: jmcarcell --- python/podio_class_generator.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index ec015230c..927ddfd1c 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -41,9 +41,17 @@ def get_clang_format(): """Check if clang format is available and if so get the list of arguments to invoke it via subprocess.Popen""" try: - cformat_exe = subprocess.check_output(['which', 'clang-format']).strip() - return [cformat_exe, "-style=file", "-fallback-style=llvm"] - except subprocess.CalledProcessError: + out = subprocess.check_output(["clang-format", "-style=file", "-fallback-style=llvm", "--help"], + stderr=subprocess.STDOUT) + if b'Unknown' in out: + print('ERROR: At least one argument was not recognized by clang-format') + print(' Most likely the version you are using is old') + return [] + out = subprocess.check_output('echo | clang-format -style=file ', stderr=subprocess.STDOUT, shell=True) + if b'.clang-format' in out: + return [] + return ["clang-format", "-style=file", "-fallback-style=llvm"] + except FileNotFoundError: print("ERROR: Cannot find clang-format executable") print(" Please make sure it is in the PATH.") return [] From ef2854fdc03b7d69500ceedaa351d0bd5561765b Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Tue, 14 Mar 2023 16:35:06 +0100 Subject: [PATCH 048/100] Fix exception handling in clang-format discovery (#391) Co-authored-by: jmcarcell --- python/podio_class_generator.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 927ddfd1c..44408c915 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -41,12 +41,10 @@ def get_clang_format(): """Check if clang format is available and if so get the list of arguments to invoke it via subprocess.Popen""" try: + # This one can raise if -fallback-style is not found out = subprocess.check_output(["clang-format", "-style=file", "-fallback-style=llvm", "--help"], stderr=subprocess.STDOUT) - if b'Unknown' in out: - print('ERROR: At least one argument was not recognized by clang-format') - print(' Most likely the version you are using is old') - return [] + # This one doesn't raise out = subprocess.check_output('echo | clang-format -style=file ', stderr=subprocess.STDOUT, shell=True) if b'.clang-format' in out: return [] @@ -55,6 +53,10 @@ def get_clang_format(): print("ERROR: Cannot find clang-format executable") print(" Please make sure it is in the PATH.") return [] + except subprocess.CalledProcessError: + print('ERROR: At least one argument was not recognized by clang-format') + print(' Most likely the version you are using is old') + return [] def write_file_if_changed(filename, content, force_write=False): From ca53c0eb102690b35cb6c640df05f76159fa1b23 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 14 Mar 2023 16:53:42 +0100 Subject: [PATCH 049/100] Release Notes for v00-16-03 --- doc/ReleaseNotes.md | 77 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/doc/ReleaseNotes.md b/doc/ReleaseNotes.md index f588fc895..829399724 100644 --- a/doc/ReleaseNotes.md +++ b/doc/ReleaseNotes.md @@ -1,3 +1,80 @@ +# v00-16-03 + +* 2023-03-14 jmcarcell ([PR#391](https://github.com/AIDASoft/podio/pull/391)) + - Catch an exception when a `clang-format` flag is not found + +* 2023-03-14 jmcarcell ([PR#390](https://github.com/AIDASoft/podio/pull/390)) + - Modify the initial `clang-format` check to try to run with all the arguments that will be used later + +* 2023-03-13 jmcarcell ([PR#389](https://github.com/AIDASoft/podio/pull/389)) + - Add .cache to the gitignore + +* 2023-03-07 Thomas Madlener ([PR#358](https://github.com/AIDASoft/podio/pull/358)) + - Embed the EDM definition in JSON format into the shared core datamodel libraries + - Generate an additional `DatamodelDefinition.h` header file containing the string literal json encoded definition + - Statically register this to the newly introduced `DatamodelRegistry` and make collections aware of which datamodel they belong to + - Collect all EDM definitions from all collections that are written with a writer and write all these definitions to the resulting file + - Currently only done for the `FrameWriter`s + - Give `podio-dump` the necessary functionality to retrieve the stored models and dump them in YAML format again + - Add roundtrip tests that compare the generated code from the original model and the one that has been dumped from a data file to ensure that all components work as intended. + - See the [advanced topics documentation](https://github.com/tmadlener/podio/blob/store-model-def/doc/advanced_topics.md#retrieving-the-edm-definition-from-a-data-file) for more details. + +* 2023-03-06 Dmitry Kalinkin ([PR#384](https://github.com/AIDASoft/podio/pull/384)) + - Added an operator for conversion to std::string for podio::version::Version + +* 2023-03-01 Thomas Madlener ([PR#378](https://github.com/AIDASoft/podio/pull/378)) + - Introduce deprecation warnings for the **EventStore based I/O model** as it **will be removed in favor of the `Frame` based one** + +* 2023-03-01 Thomas Madlener ([PR#372](https://github.com/AIDASoft/podio/pull/372)) + - Make `double` a supported type of `GenericParameters`. A similar thing has been added to LCIO in [iLCSoft/LCIO#143](https://github.com/iLCSoft/LCIO/pull/143) to support storing event weights that need double precision. + - Add more unittests to the `GenericParameters` covering also the available constructors. + +* 2023-02-27 Thomas Madlener ([PR#380](https://github.com/AIDASoft/podio/pull/380)) + - Add `getParameters` method to the `Frame` and deprecate `getGenericParametersForWrite` which offered the exact same functionality. + - Make it easily possible to get all parameters that are currently stored in a Frame via an "official" channel + - Replace all internal usages. + - Add a `getParameterKeys` templated method to get the keys for different parameter types that are currently stored in the Frame. + +* 2023-02-22 jmcarcell ([PR#377](https://github.com/AIDASoft/podio/pull/377)) + - Add a visualization tool that converts a YAML description to a graph + +* 2023-02-21 jmcarcell ([PR#376](https://github.com/AIDASoft/podio/pull/376)) + - Fix tests without SIO + +* 2023-02-14 Thomas Madlener ([PR#375](https://github.com/AIDASoft/podio/pull/375)) + - Fix the `PODIO_VERSION` preprocessor macro to be actually usable in a preprocessor context. Fixes [#374](https://github.com/AIDASoft/podio/issues/374) + - Make `podio_VERSION` preprocessor constant something that can be used in a preprocessor context (now the same as `PODIO_BUILD_VERSION` + - Add test that ensures that the macro and the constant are actually used in a preprocessor context. + +* 2023-02-13 Juraj Smiesko ([PR#373](https://github.com/AIDASoft/podio/pull/373)) + - Adding ID to the short podio-dump output + +* 2023-02-06 Nathan Brei ([PR#369](https://github.com/AIDASoft/podio/pull/369)) + - Mark non-templated definitions of `Frame::Frame`, `Frame::get`, `Frame::put` and `Frame::putParameters` as `inline` to fix linker errors. + +* 2023-02-02 jmcarcell ([PR#364](https://github.com/AIDASoft/podio/pull/364)) + - Make workflows not trigger twice on pushes to PRs + +* 2023-01-26 jmcarcell ([PR#368](https://github.com/AIDASoft/podio/pull/368)) + - CMAKE: Add option PODIO_RELAX_PYVER to allow relaxing the required match of python version with the one that ROOT has been built with to only check major and minor versions + +* 2023-01-16 Thomas Madlener ([PR#363](https://github.com/AIDASoft/podio/pull/363)) + - Move sio utility functionality defined in `SIOFrameWriter.cc` to private `sioUtils.h` header and use it also in the legacy `SIOWriter`. + - Fix cmake configure dependencies (missed in #343) for datamodel generation macro. + - Use `defaultdict` instead of hand rolling one in class generator. + +* 2023-01-16 Thomas Madlener ([PR#361](https://github.com/AIDASoft/podio/pull/361)) + - Add basic I/O tests for datatypes defined in the extension datamodel. Fixes #319 + +* 2023-01-11 jmcarcell ([PR#355](https://github.com/AIDASoft/podio/pull/355)) + - Change the readers so that when the file is missing they won't crash + +* 2023-01-10 jmcarcell ([PR#365](https://github.com/AIDASoft/podio/pull/365)) + - Fix the pre-commit workflow + +* 2022-12-23 jmcarcell ([PR#362](https://github.com/AIDASoft/podio/pull/362)) + - Rename the variable `match` to avoid collisions with a python keyword from Python 3.10 onwards + # v00-16-02 * 2022-12-19 Thomas Madlener ([PR#360](https://github.com/AIDASoft/podio/pull/360)) From 9350b28f85539039025469f58287f308932f4bd7 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 14 Mar 2023 16:53:43 +0100 Subject: [PATCH 050/100] Updating version to v00-16-03 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e884a384d..3730ab000 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ project(podio) #--- Version ------------------------------------------------------------------- SET( ${PROJECT_NAME}_VERSION_MAJOR 0 ) SET( ${PROJECT_NAME}_VERSION_MINOR 16 ) -SET( ${PROJECT_NAME}_VERSION_PATCH 2 ) +SET( ${PROJECT_NAME}_VERSION_PATCH 3 ) SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" ) From c7328d62f17061c8360eb2ac0a3fe0ce931fd2ae Mon Sep 17 00:00:00 2001 From: hegner Date: Wed, 15 Mar 2023 15:51:56 +0100 Subject: [PATCH 051/100] add stripped down schema evolution (#341) * add stripped down schema evolution; relying on ROOT for the moment --------- Co-authored-by: Thomas Madlener --- .github/scripts/pylint.rc | 8 +- .gitignore | 1 + CMakeLists.txt | 4 +- cmake/podioMacros.cmake | 16 +- include/podio/ASCIIWriter.h | 1 + include/podio/CollectionBase.h | 9 + include/podio/CollectionBuffers.h | 3 + include/podio/ROOTFrameWriter.h | 2 +- include/podio/ROOTLegacyReader.h | 2 +- include/podio/ROOTReader.h | 2 +- include/podio/SchemaEvolution.h | 14 + include/podio/UserDataCollection.h | 11 + python/CMakeLists.txt | 1 + python/podio/generator_utils.py | 7 +- python/podio/podio_config_reader.py | 10 +- python/podio_class_generator.py | 58 ++- python/podio_schema_evolution.py | 370 ++++++++++++++++++ python/templates/Collection.cc.jinja2 | 10 + python/templates/Collection.h.jinja2 | 7 + .../schemaevolution/EvolvePOD.h.jinja2 | 0 python/templates/selection.xml.jinja2 | 5 + src/ROOTFrameReader.cc | 2 +- src/ROOTFrameWriter.cc | 3 +- src/ROOTLegacyReader.cc | 2 +- src/ROOTReader.cc | 40 +- src/ROOTWriter.cc | 2 +- src/rootUtils.h | 12 +- src/selection.xml | 2 + tests/CMakeLists.txt | 5 +- tests/datalayout.yaml | 1 + tests/datalayout_old.yaml | 203 ++++++++++ tests/schema_evolution.yaml | 13 + 32 files changed, 784 insertions(+), 42 deletions(-) create mode 100644 include/podio/SchemaEvolution.h create mode 100755 python/podio_schema_evolution.py create mode 100644 python/templates/schemaevolution/EvolvePOD.h.jinja2 create mode 100755 tests/datalayout_old.yaml create mode 100644 tests/schema_evolution.yaml diff --git a/.github/scripts/pylint.rc b/.github/scripts/pylint.rc index c5c1e86b3..2db65ccd7 100644 --- a/.github/scripts/pylint.rc +++ b/.github/scripts/pylint.rc @@ -263,14 +263,14 @@ exclude-protected=_asdict,_fields,_replace,_source,_make [DESIGN] # Maximum number of arguments for function / method -max-args=8 +max-args=10 # Argument names that match this expression will be ignored. Default to name # with leading underscore ignored-argument-names=_.* # Maximum number of locals for function / method body -max-locals=20 +max-locals=25 # Maximum number of return / yield for function / method body max-returns=8 @@ -285,10 +285,10 @@ max-statements=50 max-parents=7 # Maximum number of attributes for a class (see R0902). -max-attributes=20 +max-attributes=25 # Minimum number of public methods for a class (see R0903). -min-public-methods=1 +min-public-methods=0 # Maximum number of public methods for a class (see R0904). max-public-methods=20 diff --git a/.gitignore b/.gitignore index b665c600f..22f536c88 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,7 @@ spack* # Tooling /.clangd/ /compile_commands.json +.vscode /.cache/ # Generated files diff --git a/CMakeLists.txt b/CMakeLists.txt index 3730ab000..e8a943b61 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,8 +7,8 @@ project(podio) #--- Version ------------------------------------------------------------------- SET( ${PROJECT_NAME}_VERSION_MAJOR 0 ) -SET( ${PROJECT_NAME}_VERSION_MINOR 16 ) -SET( ${PROJECT_NAME}_VERSION_PATCH 3 ) +SET( ${PROJECT_NAME}_VERSION_MINOR 17 ) +SET( ${PROJECT_NAME}_VERSION_PATCH 0 ) SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" ) diff --git a/cmake/podioMacros.cmake b/cmake/podioMacros.cmake index 0541317e6..bb217801c 100644 --- a/cmake/podioMacros.cmake +++ b/cmake/podioMacros.cmake @@ -119,6 +119,7 @@ set_property(CACHE PODIO_USE_CLANG_FORMAT PROPERTY STRINGS AUTO ON OFF) # RETURN_HEADERS variable that will be filled with the list of created headers files: ${datamodel}/*.h # RETURN_SOURCES variable that will be filled with the list of created source files : src/*.cc # Parameters: +# OLD_DESCRIPTION OPTIONAL: The path to the yaml file describing a previous datamodel version # OUTPUT_FOLDER OPTIONAL: The folder in which the output files should be placed # Default is ${CMAKE_CURRENT_SOURCE_DIR} # UPSTREAM_EDM OPTIONAL: The upstream edm and its package name that are passed to the @@ -126,13 +127,14 @@ set_property(CACHE PODIO_USE_CLANG_FORMAT PROPERTY STRINGS AUTO ON OFF) # IO_BACKEND_HANDLERS OPTIONAL: The I/O backend handlers that should be generated. The list is # passed directly to podio_class_generator.py and validated there # Default is ROOT +# SCHEMA_EVOLUTION OPTIONAL: The path to the yaml file declaring the necessary schema evolution # ) # # Note that the create_${datamodel} target will always be called, but if the YAML_FILE has not changed # this is essentially a no-op, and should not cause re-compilation. #--------------------------------------------------------------------------------------------------- function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOURCES) - CMAKE_PARSE_ARGUMENTS(ARG "" "OUTPUT_FOLDER;UPSTREAM_EDM" "IO_BACKEND_HANDLERS" ${ARGN}) + CMAKE_PARSE_ARGUMENTS(ARG "" "OLD_DESCRIPTION;OUTPUT_FOLDER;UPSTREAM_EDM;SCHEMA_EVOLUTION" "IO_BACKEND_HANDLERS" ${ARGN}) IF(NOT ARG_OUTPUT_FOLDER) SET(ARG_OUTPUT_FOLDER ${CMAKE_CURRENT_SOURCE_DIR}) ENDIF() @@ -141,11 +143,21 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR SET(UPSTREAM_EDM_ARG "--upstream-edm=${ARG_UPSTREAM_EDM}") ENDIF() + SET(OLD_DESCRIPTION_ARG "") + IF (ARG_OLD_DESCRIPTION) + SET(OLD_DESCRIPTION_ARG "--old-description=${ARG_OLD_DESCRIPTION}") + ENDIF() + IF(NOT ARG_IO_BACKEND_HANDLERS) # At least build the ROOT selection.xml by default for now SET(ARG_IO_BACKEND_HANDLERS "ROOT") ENDIF() + SET(SCHEMA_EVOLUTION_ARG "") + IF (ARG_SCHEMA_EVOLUTION) + SET(SCHEMA_EVOLUTION_ARG "--evolution_file=${ARG_SCHEMA_EVOLUTION}") + ENDIF() + set(CLANG_FORMAT_ARG "") if (PODIO_USE_CLANG_FORMAT STREQUAL AUTO OR PODIO_USE_CLANG_FORMAT) find_program(CLANG_FORMAT_EXE NAMES "clang-format") @@ -189,7 +201,7 @@ function(PODIO_GENERATE_DATAMODEL datamodel YAML_FILE RETURN_HEADERS RETURN_SOUR message(STATUS "Creating '${datamodel}' datamodel") # we need to boostrap the data model, so this has to be executed in the cmake run execute_process( - COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS} + COMMAND ${Python_EXECUTABLE} ${podio_PYTHON_DIR}/podio_class_generator.py ${CLANG_FORMAT_ARG} ${OLD_DESCRIPTION_ARG} ${SCHEMA_EVOLUTION_ARG} ${UPSTREAM_EDM_ARG} ${YAML_FILE} ${ARG_OUTPUT_FOLDER} ${datamodel} ${ARG_IO_BACKEND_HANDLERS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} RESULT_VARIABLE podio_generate_command_retval ) diff --git a/include/podio/ASCIIWriter.h b/include/podio/ASCIIWriter.h index 54fd2040b..e941bb6aa 100644 --- a/include/podio/ASCIIWriter.h +++ b/include/podio/ASCIIWriter.h @@ -2,6 +2,7 @@ #define PODIO_ASCIIWRITER_H #include "podio/EventStore.h" +#include "podio/SchemaEvolution.h" #include "podio/utilities/Deprecated.h" #include diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index fcb81401a..d2dc1a626 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -3,6 +3,7 @@ #include "podio/CollectionBuffers.h" #include "podio/ObjectID.h" +#include "podio/SchemaEvolution.h" #include #include @@ -49,6 +50,12 @@ class CollectionBase { /// Create (empty) collection buffers from which a collection can be constructed virtual podio::CollectionReadBuffers createBuffers() /*const*/ = 0; + /// Create (empty) collection buffers from which a collection can be constructed + /// Versioned to support schema evolution + virtual podio::CollectionReadBuffers createSchemaEvolvableBuffers(int readSchemaVersion, + podio::Backend backend) /*const*/ + = 0; + /// check for validity of the container after read virtual bool isValid() const = 0; @@ -61,6 +68,8 @@ class CollectionBase { virtual std::string getValueTypeName() const = 0; /// fully qualified type name of stored POD elements - with namespace virtual std::string getDataTypeName() const = 0; + /// schema version of the collection + virtual SchemaVersionT getSchemaVersion() const = 0; /// destructor virtual ~CollectionBase() = default; diff --git a/include/podio/CollectionBuffers.h b/include/podio/CollectionBuffers.h index d69ff0288..80b94c6dd 100644 --- a/include/podio/CollectionBuffers.h +++ b/include/podio/CollectionBuffers.h @@ -2,6 +2,7 @@ #define PODIO_COLLECTIONBUFFERS_H #include "podio/ObjectID.h" +#include "podio/SchemaEvolution.h" #include #include @@ -41,7 +42,9 @@ struct CollectionWriteBuffers { }; struct CollectionReadBuffers { + bool needsSchemaEvolution{false}; void* data{nullptr}; + void* data_oldschema{nullptr}; CollRefCollection* references{nullptr}; VectorMembersInfo* vectorMembers{nullptr}; diff --git a/include/podio/ROOTFrameWriter.h b/include/podio/ROOTFrameWriter.h index 2546613d8..3b0fde4ba 100644 --- a/include/podio/ROOTFrameWriter.h +++ b/include/podio/ROOTFrameWriter.h @@ -54,7 +54,7 @@ class ROOTFrameWriter { // collectionID, collectionType, subsetCollection // NOTE: same as in rootUtils.h private header! - using CollectionInfoT = std::tuple; + using CollectionInfoT = std::tuple; /** * Helper struct to group together all necessary state to write / process a diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h index 06f9c015b..b6fed99f1 100644 --- a/include/podio/ROOTLegacyReader.h +++ b/include/podio/ROOTLegacyReader.h @@ -91,7 +91,7 @@ class ROOTLegacyReader { private: std::pair getLocalTreeAndEntry(const std::string& treename); - void createCollectionBranches(const std::vector>& collInfo); + void createCollectionBranches(const std::vector>& collInfo); podio::GenericParameters readEventMetaData(); diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index ba33cae16..03a5d5557 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -85,7 +85,7 @@ class ROOTReader : public IReader { std::map* readRunMetaData() override; private: - void createCollectionBranches(const std::vector>& collInfo); + void createCollectionBranches(const std::vector>& collInfo); std::pair getLocalTreeAndEntry(const std::string& treename); // Information about the data vector as wall as the collection class type diff --git a/include/podio/SchemaEvolution.h b/include/podio/SchemaEvolution.h new file mode 100644 index 000000000..fd77fddb6 --- /dev/null +++ b/include/podio/SchemaEvolution.h @@ -0,0 +1,14 @@ +#ifndef PODIO_SCHEMAEVOLUTION_H +#define PODIO_SCHEMAEVOLUTION_H + +#include + +namespace podio { + +enum class Backend { ROOT, SIO }; + +using SchemaVersionT = uint32_t; + +} // namespace podio + +#endif \ No newline at end of file diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index 7d28e2c99..b3d910ed8 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -118,6 +118,12 @@ class UserDataCollection : public CollectionBase { }}; } + podio::CollectionReadBuffers createSchemaEvolvableBuffers(__attribute__((unused)) int readSchemaVersion, + __attribute__((unused)) + podio::Backend backend) /*const*/ final { + return createBuffers(); + } + /// check for validity of the container after read bool isValid() const override { return true; @@ -157,6 +163,11 @@ class UserDataCollection : public CollectionBase { void setSubsetCollection(bool) override { } + /// The schema version is fixed manually + SchemaVersionT getSchemaVersion() const final { + return 1; + } + /// Print this collection to the passed stream void print(std::ostream& os = std::cout, bool flush = true) const override { os << "["; diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 54547e833..43b706390 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -4,6 +4,7 @@ SET(podio_PYTHON_DIR ${CMAKE_CURRENT_LIST_DIR} PARENT_SCOPE) set(to_install podio_class_generator.py + podio_schema_evolution.py figure.txt EventStore.py) diff --git a/python/podio/generator_utils.py b/python/podio/generator_utils.py index e50b139e7..600d11ca8 100644 --- a/python/podio/generator_utils.py +++ b/python/podio/generator_utils.py @@ -71,9 +71,10 @@ def _is_fixed_width_type(type_name): class DataType: """Simple class to hold information about a datatype or component that is defined in the datamodel.""" - def __init__(self, klass): + def __init__(self, klass, schema_version): self.full_type = klass self.namespace, self.bare_type = _get_namespace_class(self.full_type) + self.schema_version = schema_version def __str__(self): if self.namespace: @@ -195,8 +196,7 @@ def _to_json(self): class DataModel: # pylint: disable=too-few-public-methods """A class for holding a complete datamodel read from a configuration file""" - - def __init__(self, datatypes=None, components=None, options=None): + def __init__(self, datatypes=None, components=None, options=None, schema_version=None): self.options = options or { # should getters / setters be prefixed with get / set? "getSyntax": False, @@ -205,6 +205,7 @@ def __init__(self, datatypes=None, components=None, options=None): # use subfolder when including package header files "includeSubfolder": False, } + self.schema_version = schema_version self.components = components or {} self.datatypes = datatypes or {} diff --git a/python/podio/podio_config_reader.py b/python/podio/podio_config_reader.py index 3992a3aa9..79f43d1b2 100644 --- a/python/podio/podio_config_reader.py +++ b/python/podio/podio_config_reader.py @@ -409,6 +409,14 @@ def _read_datatype(cls, value): @classmethod def parse_model(cls, model_dict, package_name, upstream_edm=None): """Parse a model from the dictionary, e.g. read from a yaml file.""" + + if "schema_version" in model_dict: + schema_version = model_dict["schema_version"] + else: + warnings.warn("Please provide a schema_version entry. It will become mandatory. Setting it to 0 as default", + FutureWarning, stacklevel=3) + schema_version = 0 + components = {} if "components" in model_dict: for klassname, value in model_dict["components"].items(): @@ -432,7 +440,7 @@ def parse_model(cls, model_dict, package_name, upstream_edm=None): # If this doesn't raise an exception everything should in principle work out validator = ClassDefinitionValidator() - datamodel = DataModel(datatypes, components, options) + datamodel = DataModel(datatypes, components, options, schema_version) validator.validate(datamodel, upstream_edm) return datamodel diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 44408c915..ad8877042 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -18,6 +18,7 @@ from podio.podio_config_reader import PodioConfigReader from podio.generator_utils import DataType, DefinitionError, DataModelJSONEncoder +from podio_schema_evolution import DataModelComparator # dealing with cyclic imports THIS_DIR = os.path.dirname(os.path.abspath(__file__)) TEMPLATE_DIR = os.path.join(THIS_DIR, 'templates') @@ -87,13 +88,20 @@ class IncludeFrom(IntEnum): class ClassGenerator: """The entry point for reading a datamodel definition and generating the necessary source code from it.""" - def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dryrun, upstream_edm): + def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dryrun, + upstream_edm, old_description, evolution_file): self.install_dir = install_dir self.package_name = package_name self.io_handlers = io_handlers self.verbose = verbose self.dryrun = dryrun self.yamlfile = yamlfile + # schema evolution specific code + self.old_yamlfile = old_description + self.evolution_file = evolution_file + self.old_datamodel = None + self.old_datamodels_components = set() + self.old_datamodels_datatypes = set() try: self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm) @@ -110,6 +118,7 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr self.incfolder = self.datamodel.options['includeSubfolder'] self.expose_pod_members = self.datamodel.options["exposePODMembers"] self.upstream_edm = upstream_edm + self.schema_version = self.datamodel.schema_version self.clang_format = [] self.generated_files = [] @@ -127,9 +136,36 @@ def process(self): if 'ROOT' in self.io_handlers: self._create_selection_xml() - self.print_report() self._write_cmake_lists_file() + self.process_schema_evolution() + + self.print_report() + + def process_schema_evolution(self): + """Process the schema evolution""" + # have to make all necessary comparisons + # which are the ones that changed? + # have to extend the selection xml file + if self.old_yamlfile: + comparator = DataModelComparator(self.yamlfile, self.old_yamlfile, + evolution_file=self.evolution_file) + comparator.read() + comparator.compare() + + # some sanity checks + if len(comparator.errors) > 0: + print(f"The given datamodels '{self.yamlfile}' and '{self.old_yamlfile}' \ +have unresolvable schema evolution incompatibilities:") + for error in comparator.errors: + print(error) + sys.exit(-1) + if len(comparator.warnings) > 0: + print(f"The given datamodels '{self.yamlfile}' and '{self.old_yamlfile}' \ +have resolvable schema evolution incompatibilities:") + for warning in comparator.warnings: + print(warning) + sys.exit(-1) def print_report(self): """Print a summary report about the generated code""" @@ -228,7 +264,7 @@ def _process_component(self, name, component): includes.update(component.get("ExtraCode", {}).get("includes", "").split('\n')) component['includes'] = self._sort_includes(includes) - component['class'] = DataType(name) + component['class'] = DataType(name, self.schema_version) self._fill_templates('Component', component) @@ -375,7 +411,7 @@ def _preprocess_datatype(self, name, definition): # Make a copy here and add the preprocessing steps to that such that the # original definition can be left untouched data = deepcopy(definition) - data['class'] = DataType(name) + data['class'] = DataType(name, self.schema_version) data['includes_data'] = self._get_member_includes(definition["Members"]) self._preprocess_for_class(data) self._preprocess_for_obj(data) @@ -459,8 +495,10 @@ def _needs_include(self, classname) -> IncludeFrom: def _create_selection_xml(self): """Create the selection xml that is necessary for ROOT I/O""" - data = {'components': [DataType(c) for c in self.datamodel.components], - 'datatypes': [DataType(d) for d in self.datamodel.datatypes]} + data = {'components': [DataType(c, self.schema_version) for c in self.datamodel.components], + 'datatypes': [DataType(d, self.schema_version) for d in self.datamodel.datatypes], + 'old_schema_components': [DataType(d, self.schema_version) for d in + self.old_datamodels_datatypes | self.old_datamodels_components]} self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data)) def _build_include(self, member): @@ -539,6 +577,11 @@ def read_upstream_edm(name_path): ' EDM. Format is \':\'. ' 'Note that only the code for the current EDM will be generated', default=None, type=read_upstream_edm) + parser.add_argument('--old-description', + help='Provide schema evolution relative to the old yaml file.', + default=None, action='store') + parser.add_argument('-e', '--evolution_file', help='yaml file clarifying schema evolutions', + default=None, action='store') args = parser.parse_args() @@ -551,7 +594,8 @@ def read_upstream_edm(name_path): os.makedirs(directory) gen = ClassGenerator(args.description, args.targetdir, args.packagename, args.iohandlers, - verbose=args.verbose, dryrun=args.dryrun, upstream_edm=args.upstream_edm) + verbose=args.verbose, dryrun=args.dryrun, upstream_edm=args.upstream_edm, + old_description=args.old_description, evolution_file=args.evolution_file) if args.clangformat: gen.clang_format = get_clang_format() gen.process() diff --git a/python/podio_schema_evolution.py b/python/podio_schema_evolution.py new file mode 100755 index 000000000..7bd5b676b --- /dev/null +++ b/python/podio_schema_evolution.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python +""" +Provides infrastructure for analyzing schema definitions for schema evolution +""" + +import yaml + +from podio.podio_config_reader import PodioConfigReader + + +# @TODO: not really a good class model here +# this is a remnant from previous more-sophisticated setups + + +class SchemaChange: + """The base class for all schema changes providing a brief description as representation""" + def __init__(self, description): + self.description = description + + def __str__(self) -> str: + return self.description + + def __repr__(self) -> str: + return self.description + + +class AddedComponent(SchemaChange): + """Class representing an added component""" + def __init__(self, component, name): + self.component = component + self.name = name + super().__init__(f"'{self.component.name}' has been added") + + +class DroppedComponent(SchemaChange): + """Class representing a dropped component""" + def __init__(self, component, name): + self.component = component + self.name = name + self.klassname = name + super().__init__(f"'{self.name}' has been dropped") + + +class AddedDatatype(SchemaChange): + """Class representing an added datatype""" + def __init__(self, datatype, name): + self.datatype = datatype + self.name = name + self.klassname = name + super().__init__(f"'{self.name}' has been added") + + +class DroppedDatatype(SchemaChange): + """Class representing a dropped datatype""" + def __init__(self, datatype, name): + self.datatype = datatype + self.name = name + self.klassname = name + super().__init__(f"'{self.name}' has been dropped") + + +class RenamedDataType(SchemaChange): + """Class representing a renamed datatype""" + def __init__(self, name_old, name_new): + self.name_old = name_old + self.name_new = name_new + super().__init__(f"'{self.name_new}': datatype '{self.name_old}' renamed to '{self.name_new}'.") + + +class AddedMember(SchemaChange): + """Class representing an added member""" + def __init__(self, member, definition_name): + self.member = member + self.definition_name = definition_name + self.klassname = definition_name + super().__init__(f"'{self.definition_name}' has an added member '{self.member.name}'") + + +class DroppedMember(SchemaChange): + """Class representing a dropped member""" + def __init__(self, member, definition_name): + self.member = member + self.definition_name = definition_name + self.klassname = definition_name + super().__init__(f"'{self.definition_name}' has a dropped member '{self.member.name}") + + +class ChangedMember(SchemaChange): + """Class representing a type change in a member""" + def __init__(self, name, member_name, old_member, new_member): + self.name = name + self.member_name = member_name + self.old_member = old_member + self.new_member = new_member + self.klassname = name + super().__init__(f"'{self.name}.{self.member_name}' changed type from '+\ + '{self.old_member.full_type} to {self.new_member.full_type}") + + +class RenamedMember(SchemaChange): + """Class representing a renamed member""" + def __init__(self, name, member_name_old, member_name_new): + self.name = name + self.member_name_old = member_name_old + self.member_name_new = member_name_new + self.klassname = name + super().__init__(f"'{self.name}': member '{self.member_name_old}' renamed to '{self.member_name_new}'.") + + +def sio_filter(schema_changes): + """ + Checks what is required/supported for the SIO backend + + At this point in time all schema changes have to be handled on PODIO side + + """ + return schema_changes + + +def root_filter(schema_changes): + """ + Checks what is required/supported for the ROOT backend + + At this point in time we are only interested in renames. + Everything else will be done by ROOT automatically + """ + relevant_schema_changes = [] + for schema_change in schema_changes: + if isinstance(schema_change, RenamedMember): + relevant_schema_changes.append(schema_change) + return relevant_schema_changes + + +class DataModelComparator: + """ + Compares two datamodels and extracts required schema evolution + """ + def __init__(self, yamlfile_new, yamlfile_old, evolution_file=None) -> None: + self.yamlfile_new = yamlfile_new + self.yamlfile_old = yamlfile_old + self.evolution_file = evolution_file + self.reader = PodioConfigReader() + + self.datamodel_new = None + self.datamodel_old = None + self.detected_schema_changes = [] + self.read_schema_changes = [] + self.schema_changes = [] + + self.warnings = [] + self.errors = [] + + def compare(self) -> None: + """execute the comparison on-preloaded datamodel definitions""" + self._compare_components() + self._compare_datatypes() + self.heuristics() + + def _compare_components(self) -> None: + """compare component definitions of old and new datamodel""" + # first check for dropped, added and kept components + added_components, dropped_components, kept_components = self._compare_keys(self.datamodel_new.components.keys(), + self.datamodel_old.components.keys()) + # Make findings known globally + self.detected_schema_changes.extend([AddedComponent(self.datamodel_new.components[name], name) + for name in added_components]) + self.detected_schema_changes.extend([DroppedComponent(self.datamodel_old.components[name], name) + for name in dropped_components]) + + self._compare_definitions(kept_components, self.datamodel_new.components, self.datamodel_old.components, "Members") + + def _compare_datatypes(self) -> None: + """compare datatype definitions of old and new datamodel""" + # first check for dropped, added and kept components + added_types, dropped_types, kept_types = self._compare_keys(self.datamodel_new.datatypes.keys(), + self.datamodel_old.datatypes.keys()) + # Make findings known globally + self.detected_schema_changes.extend([AddedDatatype(self.datamodel_new.datatypes[name], name) + for name in added_types]) + self.detected_schema_changes.extend([DroppedDatatype(self.datamodel_old.datatypes[name], name) + for name in dropped_types]) + + self._compare_definitions(kept_types, self.datamodel_new.datatypes, self.datamodel_old.datatypes, "Members") + + def _compare_definitions(self, definitions, first, second, category) -> None: + """compare member definitions in old and new datamodel""" + for name in definitions: + # we are only interested in members not the extracode + members1 = {member.name: member for member in first[name][category]} + members2 = {member.name: member for member in second[name][category]} + added_members, dropped_members, kept_members = self._compare_keys(members1.keys(), + members2.keys()) + # Make findings known globally + self.detected_schema_changes.extend([AddedMember(members1[member], name) for member in added_members]) + self.detected_schema_changes.extend([DroppedMember(members2[member], name) for member in dropped_members]) + + # now let's compare old and new for the kept members + for member_name in kept_members: + new = members1[member_name] + old = members2[member_name] + if old.full_type != new.full_type: + self.detected_schema_changes.append(ChangedMember(name, member_name, old, new)) + + @staticmethod + def _compare_keys(keys1, keys2): + """compare keys of two given dicts. return added, dropped and overlapping keys""" + added = set(keys1).difference(keys2) + dropped = set(keys2).difference(keys1) + kept = set(keys1).intersection(keys2) + return added, dropped, kept + + def get_changed_schemata(self, schema_filter=None): + """return the schemata which actually changed""" + if schema_filter: + schema_changes = schema_filter(self.schema_changes) + else: + schema_changes = self.schema_changes + changed_klasses = {} + for schema_change in schema_changes: + changed_klass = changed_klasses.setdefault(schema_change.klassname, []) + changed_klass.append(schema_change) + return changed_klasses + + def heuristics_members(self, added_members, dropped_members, schema_changes): + """make analysis of member changes in a given data type """ + for dropped_member in dropped_members: + added_members_in_definition = [member for member in added_members if + dropped_member.definition_name == member.definition_name] + for added_member in added_members_in_definition: + if added_member.member.full_type == dropped_member.member.full_type: + # this is a rename candidate. So let's see whether it has been explicitly declared by the user + is_rename = False + for schema_change in self.read_schema_changes: + if isinstance(schema_change, RenamedMember) and \ + (schema_change.name == dropped_member.definition_name) and \ + (schema_change.member_name_old == dropped_member.member.name) and \ + (schema_change.member_name_new == added_member.member.name): + # remove the dropping/adding from the schema changes and replace it by the rename + schema_changes.remove(dropped_member) + schema_changes.remove(added_member) + schema_changes.append(schema_change) + is_rename = True + if not is_rename: + self.warnings.append(f"Definition '{dropped_member.definition_name}' has a potential rename " + f"'{dropped_member.member.name}' -> '{added_member.member.name}' of type " + f"'{dropped_member.member.full_type}'.") + + def heuristics(self): + """make an analysis of the data model changes: + - check which can be auto-resolved + - check which need extra information from the user + - check which one are plain forbidden/impossible + """ + # let's analyse the changes in more detail + # make a copy that can be altered along the way + schema_changes = self.detected_schema_changes.copy() + # are there dropped/added member pairs that could be interpreted as rename? + dropped_members = [change for change in schema_changes if isinstance(change, DroppedMember)] + added_members = [change for change in schema_changes if isinstance(change, AddedMember)] + self.heuristics_members(added_members, dropped_members, schema_changes) + + # are the member changes actually supported/supportable? + changed_members = [change for change in schema_changes if isinstance(change, ChangedMember)] + for change in changed_members: + # changes between arrays and basic types are forbidden + if change.old_member.is_array != change.new_member.is_array: + self.errors.append(f"Forbidden schema change in '{change.name}' for '{change.member_name}' from " + f"'{change.old_member.full_type}' to '{change.new_member.full_type}'") + + # are there dropped/added datatype pairs that could be interpreted as rename? + # for now assuming no change to the individual datatype definition + # I do not think more complicated heuristics are needed at this point in time + dropped_datatypes = [change for change in schema_changes if isinstance(change, DroppedDatatype)] + added_datatypes = [change for change in schema_changes if isinstance(change, AddedDatatype)] + + for dropped in dropped_datatypes: + dropped_members = {member.name: member for member in dropped.datatype["Members"]} + is_known_evolution = False + for added in added_datatypes: + added_members = {member.name: member for member in added.datatype["Members"]} + if set(dropped_members.keys()) == set(added_members.keys()): + for schema_change in self.read_schema_changes: + if isinstance(schema_change, RenamedDataType) and \ + (schema_change.name_old == dropped.name and schema_change.name_new == added.name): + schema_changes.remove(dropped) + schema_changes.remove(added) + schema_changes.append(schema_change) + is_known_evolution = True + if not is_known_evolution: + self.warnings.append(f"Potential rename of '{dropped.name}' into '{added.name}'.") + + # are there dropped/added component pairs that could be interpreted as rename? + dropped_components = [change for change in schema_changes if isinstance(change, DroppedComponent)] + added_components = [change for change in schema_changes if isinstance(change, AddedComponent)] + + for dropped in dropped_components: + dropped_members = {member.name: member for member in dropped.component["Members"]} + for added in added_components: + added_members = {member.name: member for member in added.component["Members"]} + if set(dropped_members.keys()) == set(added_members.keys()): + self.warnings.append(f"Potential rename of '{dropped.name}' into '{added.name}'.") + + # make the results of the heuristics known to the instance + self.schema_changes = schema_changes + + def print_comparison(self): + """print the result of the datamodel comparison""" + print(f"Comparing datamodel versions {self.datamodel_new.schema_version} and {self.datamodel_old.schema_version}") + + print(f"Detected {len(self.schema_changes)} schema changes:") + for change in self.schema_changes: + print(f" - {change}") + + if len(self.warnings) > 0: + print("Warnings:") + for warning in self.warnings: + print(f" - {warning}") + + if len(self.errors) > 0: + print("ERRORS:") + for error in self.errors: + print(f" - {error}") + + def read(self) -> None: + """read datamodels from yaml files""" + self.datamodel_new = self.reader.read(self.yamlfile_new, package_name="new") + self.datamodel_old = self.reader.read(self.yamlfile_old, package_name="old") + if self.evolution_file: + self.read_evolution_file() + + def read_evolution_file(self) -> None: + """read and parse evolution file""" + supported_operations = ('member_rename', 'class_renamed_to') + with open(self.evolution_file, "r", encoding='utf-8') as stream: + content = yaml.load(stream, yaml.SafeLoader) + from_schema_version = content["from_schema_version"] + to_schema_version = content["to_schema_version"] + if (from_schema_version != self.datamodel_old.schema_version) or (to_schema_version != self.datamodel_new.schema_version): # nopep8 # noqa + raise BaseException("Versions in schema evolution file do not match versions in data model descriptions.") # nopep8 # noqa + + if "evolutions" in content: + for klassname, value in content["evolutions"].items(): + # now let's go through the various supported evolutions + for operation, details in value.items(): + if operation not in supported_operations: + raise BaseException(f'Schema evolution operation {operation} in {klassname} unknown or not supported') # nopep8 # noqa + if operation == 'member_rename': + schema_change = RenamedMember(klassname, details[0], details[1]) + self.read_schema_changes.append(schema_change) + elif operation == 'class_renamed_to': + schema_change = RenamedDataType(klassname, details) + self.read_schema_changes.append(schema_change) + + +########################## +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser(description='Given two yaml files this script analyzes ' + 'the difference of the two datamodels') + + parser.add_argument('new', help='yaml file describing the new datamodel') + parser.add_argument('old', help='yaml file describing the old datamodel') + parser.add_argument('-e', '--evo', help='yaml file clarifying schema evolutions', action='store') + args = parser.parse_args() + + comparator = DataModelComparator(args.new, args.old, evolution_file=args.evo) + comparator.read() + comparator.compare() + comparator.print_comparison() + print(comparator.get_changed_schemata(schema_filter=root_filter)) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 8c121de20..185b98971 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -175,6 +175,16 @@ podio::CollectionReadBuffers {{ collection_type }}::createBuffers() /*const*/ { return readBuffers; } +podio::CollectionReadBuffers {{ collection_type }}::createSchemaEvolvableBuffers(int readSchemaVersion, podio::Backend /*backend*/) /*const*/ { + // no version difference -> no-op + if (readSchemaVersion == {{ class.schema_version }}) { + return createBuffers(); + } + // default is no-op as well + return createBuffers(); +} + + {% for member in Members %} {{ macros.vectorized_access(class, member) }} {% endfor %} diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index 2c1a80e3b..f66a4fb89 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -82,6 +82,8 @@ public: std::string getValueTypeName() const final { return std::string("{{ (class | string ).strip(':') }}"); } /// fully qualified type name of stored POD elements - with namespace std::string getDataTypeName() const final { return std::string("{{ (class | string ).strip(':')+"Data" }}"); } + /// schema version + unsigned int getSchemaVersion() const final { return {{ class.schema_version }}; }; bool isSubsetCollection() const final { return m_isSubsetColl; @@ -112,6 +114,11 @@ public: /// Create (empty) collection buffers from which a collection can be constructed podio::CollectionReadBuffers createBuffers() /*const*/ final; + /// Create (empty) collection buffers from which a collection can be constructed + /// Versioned to support schema evolution + podio::CollectionReadBuffers createSchemaEvolvableBuffers(int readSchemaVersion, podio::Backend backend) /*const*/ final; + + void setID(unsigned ID) final { m_collectionID = ID; if (!m_isSubsetColl) { diff --git a/python/templates/schemaevolution/EvolvePOD.h.jinja2 b/python/templates/schemaevolution/EvolvePOD.h.jinja2 new file mode 100644 index 000000000..e69de29bb diff --git a/python/templates/selection.xml.jinja2 b/python/templates/selection.xml.jinja2 index 1d6f4e4f6..b0da9ff74 100644 --- a/python/templates/selection.xml.jinja2 +++ b/python/templates/selection.xml.jinja2 @@ -23,7 +23,12 @@ {# we have to declare them here, otherwise they cannot be easily imported #} {{ class_selection(class) }} {{ class_selection(class, postfix='Collection') }} +{% endfor %} + +{% for class in old_schema_components %} +{{ class_selection(class) }} {% endfor %} + diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index f8880133c..f4ba00bc4 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -264,7 +264,7 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, std::vector> storedClasses; storedClasses.reserve(collInfo.size()); - for (const auto& [collID, collType, isSubsetColl] : collInfo) { + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here const auto name = idTable.name(collID); diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index 3f552d69f..e6fa85de6 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -95,7 +95,8 @@ void ROOTFrameWriter::initBranches(CategoryInfo& catInfo, const std::vectorgetTypeName(), coll->isSubsetCollection()); + catInfo.collInfo.emplace_back(catInfo.idTable.collectionID(name), coll->getTypeName(), coll->isSubsetCollection(), + coll->getSchemaVersion()); } // Also make branches for the parameters diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index f25c1b6bc..0a9380bd1 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -185,7 +185,7 @@ unsigned ROOTLegacyReader::getEntries(const std::string& name) const { void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { size_t collectionIndex{0}; - for (const auto& [collID, collType, isSubsetColl] : collInfo) { + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here const auto name = m_table->name(collID); diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index b2cb33100..781c1aea1 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -163,25 +163,45 @@ void ROOTReader::openFiles(const std::vector& filenames) { podio::version::Version* versionPtr{nullptr}; if (auto* versionBranch = root_utils::getBranch(metadatatree, "PodioVersion")) { versionBranch->SetAddress(&versionPtr); + metadatatree->GetEntry(0); } + m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0}; + + // Read the collection type info + // For versions <0.13.1 it does not exist and has to be rebuilt from scratch + if (m_fileVersion < podio::version::Version{0, 13, 1}) { + + std::cout << "PODIO: Reconstructing CollectionTypeInfo branch from other sources in file: \'" + << m_chain->GetFile()->GetName() << "\'" << std::endl; + metadatatree->GetEntry(0); + const auto collectionInfo = root_utils::reconstructCollectionInfo(m_chain, *m_table); + createCollectionBranches(collectionInfo); - // Check if the CollectionTypeInfo branch is there and assume that the file - // has been written with with podio pre #197 (<0.13.1) if that is not the case - if (auto* collInfoBranch = root_utils::getBranch(metadatatree, "CollectionTypeInfo")) { + } else if (m_fileVersion < podio::version::Version{0, 17, 0}) { + + auto* collInfoBranch = root_utils::getBranch(metadatatree, "CollectionTypeInfo"); + auto collectionInfoWithoutSchema = new std::vector; auto collectionInfo = new std::vector; - collInfoBranch->SetAddress(&collectionInfo); + collInfoBranch->SetAddress(&collectionInfoWithoutSchema); metadatatree->GetEntry(0); + for (const auto& [collID, collType, isSubsetColl] : *collectionInfoWithoutSchema) { + collectionInfo->emplace_back(collID, collType, isSubsetColl, 0); + } createCollectionBranches(*collectionInfo); + delete collectionInfoWithoutSchema; delete collectionInfo; + } else { - std::cout << "PODIO: Reconstructing CollectionTypeInfo branch from other sources in file: \'" - << m_chain->GetFile()->GetName() << "\'" << std::endl; + + auto* collInfoBranch = root_utils::getBranch(metadatatree, "CollectionTypeInfo"); + + auto collectionInfo = new std::vector; + collInfoBranch->SetAddress(&collectionInfo); metadatatree->GetEntry(0); - const auto collectionInfo = root_utils::reconstructCollectionInfo(m_chain, *m_table); - createCollectionBranches(collectionInfo); + createCollectionBranches(*collectionInfo); + delete collectionInfo; } - m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0}; delete versionPtr; } @@ -226,7 +246,7 @@ void ROOTReader::goToEvent(unsigned eventNumber) { void ROOTReader::createCollectionBranches(const std::vector& collInfo) { size_t collectionIndex{0}; - for (const auto& [collID, collType, isSubsetColl] : collInfo) { + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here const auto name = m_table->name(collID); diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index 46842b370..cf0a768e6 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -112,7 +112,7 @@ void ROOTWriter::finish() { m_store->get(name, coll); const auto collType = coll->getTypeName(); // const auto collType = "std::vector<" + coll->getDataTypeName() + ">"; - collectionInfo.emplace_back(collID, std::move(collType), coll->isSubsetCollection()); + collectionInfo.emplace_back(collID, std::move(collType), coll->isSubsetCollection(), coll->getSchemaVersion()); } m_metadatatree->Branch("CollectionTypeInfo", &collectionInfo); diff --git a/src/rootUtils.h b/src/rootUtils.h index 215c7fea6..2ad69389b 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -101,9 +101,11 @@ inline void setCollectionAddresses(const BufferT& collBuffers, const CollectionB } // A collection of additional information that describes the collection: the -// collectionID, the collection (data) type, and whether it is a subset -// collection -using CollectionInfoT = std::tuple; +// collectionID, the collection (data) type, whether it is a subset +// collection, and its schema version +using CollectionInfoT = std::tuple; +// for backwards compatibility +using CollectionInfoTWithoutSchema = std::tuple; inline void readBranchesData(const CollectionBranches& branches, Long64_t entry) { // Read all data @@ -140,8 +142,8 @@ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable std::string_view dataClass = bufferClassName; dataClass.remove_suffix(5); const auto collClass = std::string(dataClass.substr(7)) + "Collection"; - // Assume that there are no subset collections in "old files" - collInfo.emplace_back(collID, std::move(collClass), false); + // Assume that there are no subset collections in "old files" and schema is 0 + collInfo.emplace_back(collID, std::move(collClass), false, 0); } else { std::cerr << "Problems reconstructing collection info for collection: \'" << name << "\'\n"; } diff --git a/src/selection.xml b/src/selection.xml index d198bfab6..92ca68764 100644 --- a/src/selection.xml +++ b/src/selection.xml @@ -14,6 +14,8 @@ + + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2b056bc74..c3944d4ea 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,7 +6,10 @@ foreach( _conf ${CMAKE_CONFIGURATION_TYPES} ) endforeach() PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources - IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS}) + IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} + OLD_DESCRIPTION datalayout_old.yaml + SCHEMA_EVOLUTION schema_evolution.yaml + ) # Use the cmake building blocks to add the different parts (conditionally) PODIO_ADD_DATAMODEL_CORE_LIB(TestDataModel "${headers}" "${sources}") diff --git a/tests/datalayout.yaml b/tests/datalayout.yaml index 4028ec7d1..2bcf7cb11 100755 --- a/tests/datalayout.yaml +++ b/tests/datalayout.yaml @@ -1,4 +1,5 @@ --- +schema_version : 1 options : # should getters / setters be prefixed with get / set? diff --git a/tests/datalayout_old.yaml b/tests/datalayout_old.yaml new file mode 100755 index 000000000..81a9d5707 --- /dev/null +++ b/tests/datalayout_old.yaml @@ -0,0 +1,203 @@ +--- +schema_version : 0 + +options : + # should getters / setters be prefixed with get / set? + getSyntax: False + # should POD members be exposed with getters/setters in classes that have them as members? + exposePODMembers: True + includeSubfolder: True + +components : + + ToBeDroppedStruct: + Members: + - int x + + SimpleStruct: + Members: + - int x + - int z + - std::array p + # can also add c'tors: + ExtraCode : + declaration: " + SimpleStruct() : x(0),y(0),z(0) {}\n + SimpleStruct( const int* v) : x(v[0]),y(v[1]),z(v[2]) {} + " + + NotSoSimpleStruct: + Members: + - SimpleStruct data // component members can have descriptions + + ex2::NamespaceStruct: + Members: + - int x + - int y_old + + ex2::NamespaceInNamespaceStruct: + Members: + - ex2::NamespaceStruct data + + StructWithFixedWithTypes: + Members: + - uint16_t fixedUnsigned16 // unsigned int with exactly 16 bits + - std::int64_t fixedInteger64 // int with exactly 64 bits + - int32_t fixedInteger32 // int with exactly 32 bits + + CompWithInit: + Members: + - int i{42} // is there even another value to initialize ints to? + - std::array arr {1.2, 3.4} // half initialized double array + +datatypes : + + EventInfoOldName: + Description : "Event info" + Author : "B. Hegner" + Members : + - int Number // event number + MutableExtraCode : + declaration: "void setNumber(int n) { Number( n ) ; } " + ExtraCode: + declaration: "int getNumber() const;" + implementation: "int {name}::getNumber() const { return Number(); }" + + ExampleHit : + Description : "Example Hit" + Author : "B. Hegner" + Members: + - unsigned long long cellID // cellID + - double x // x-coordinate + - double y // y-coordinate + - double z // z-coordinate + - double energy // measured energy deposit + + ExampleMC : + Description : "Example MC-particle" + Author: "F.Gaede" + Members: + - double energy // energy + - int PDG // PDG code + OneToManyRelations: + - ExampleMC parents // parents + - ExampleMC daughters // daughters + + ExampleCluster : + Description : "Cluster" + Author : "B. Hegner" + Members: + - double energy // cluster energy + OneToManyRelations: + - ExampleHit Hits // hits contained in the cluster + - ExampleCluster Clusters // sub clusters used to create this cluster + + ExampleReferencingType : + Description : "Referencing Type" + Author : "B. Hegner" + OneToManyRelations: + - ExampleCluster Clusters // some refs to Clusters + - ExampleReferencingType Refs // refs into same type + + ExampleWithVectorMember : + Description : "Type with a vector member" + Author : "B. Hegner" + VectorMembers: + - int count // various ADC counts + + ExampleWithOneRelation : + Description : "Type with one relation member" + Author : "Benedikt Hegner" + OneToOneRelations: + - ExampleCluster cluster // a particular cluster + + ExampleWithArrayComponent: + Description: "A type that has a component with an array" + Author: "Thomas Madlener" + Members: + - SimpleStruct s // a component that has an array + + ExampleWithComponent : + Description : "Type with one component" + Author : "Benedikt Hegner" + Members : + - NotSoSimpleStruct component // a component + + ExampleForCyclicDependency1 : + Description : "Type for cyclic dependency" + Author : "Benedikt Hegner" + OneToOneRelations : + - ExampleForCyclicDependency2 ref // a ref + + ExampleForCyclicDependency2 : + Description : "Type for cyclic dependency" + Author : "Benedikt Hegner" + OneToOneRelations : + - ExampleForCyclicDependency1 ref // a ref + +# ExampleWithArray : +# Description : "Type with an array" +# Author : "Benedikt Hegner" +# Members: +# - std::array array // the array + + ex42::ExampleWithNamespace : + Description : "Type with namespace and namespaced member" + Author : "Joschka Lingemann" + Members: + - ex2::NamespaceStruct component // a component + + ex42::ExampleWithARelation : + Description : "Type with namespace and namespaced relation" + Author : "Joschka Lingemann" + Members: + - float number // just a number + OneToOneRelations : + - ex42::ExampleWithNamespace ref // a ref in a namespace + OneToManyRelations : + - ex42::ExampleWithNamespace refs // multiple refs in a namespace + + ExampleWithDifferentNamespaceRelations: + Description: "Datatype using a namespaced relation without being in the same namespace" + Author: "Thomas Madlener" + OneToOneRelations: + - ex42::ExampleWithNamespace rel // a relation in a different namespace + OneToManyRelations: + - ex42::ExampleWithNamespace rels // relations in a different namespace + + ExampleWithArray: + Description: "Datatype with an array member" + Author: "Joschka Lingemann" + Members: + - NotSoSimpleStruct arrayStruct // component that contains an array + - std::array myArray // array-member without space to test regex + - std::array anotherArray2 // array-member with space to test regex + - std::array snail_case_array // snail case to test regex + - std::array snail_case_Array3 // mixing things up for regex + - std::array structArray // an array containing structs + + ExampleWithFixedWidthIntegers: + Description: "Datatype using fixed width integer types as members" + Author: "Thomas Madlener" + Members: + - std::int16_t fixedI16 // some integer with exactly 16 bits + - uint64_t fixedU64 // unsigned int with exactly 64 bits + - uint32_t fixedU32 // unsigned int with exactly 32 bits + - StructWithFixedWithTypes fixedWidthStruct // struct with more fixed width types + - std::array fixedWidthArray // 32 bits split into two times 16 bits + + ExampleWithUserInit: + Description: "Datatype with user defined initialization values" + Author: "Thomas Madlener" + Members: + - std::int16_t i16Val{42} // some int16 value + - std::array floats {3.14f, 1.23f} // some float values + - ex2::NamespaceStruct s{10, 11} // one that we happen to know works + - double d{9.876e5} // double val + - CompWithInit comp // To make sure that the default initializer of the component does what it should + + ExampleOfDroppedType: + Description: "Datatype with user defined initialization values" + Author: "Thomas Madlener" + Members: + - int x // some member \ No newline at end of file diff --git a/tests/schema_evolution.yaml b/tests/schema_evolution.yaml new file mode 100644 index 000000000..8a9e925f8 --- /dev/null +++ b/tests/schema_evolution.yaml @@ -0,0 +1,13 @@ +--- +from_schema_version : 0 +to_schema_version : 1 + +evolutions: + + ex2::NamespaceStruct: + member_rename: + - y_old + - y + + EventInfoOldName: + class_renamed_to: EventInfo From 9cd03731dfe4b4c37eac34e6364411bb55be940a Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 3 Apr 2023 09:06:58 +0200 Subject: [PATCH 052/100] Do not mark generated files as GENERATED (#397) Keep them in place even if the clean target is specified. See #396 --- python/podio_class_generator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index ad8877042..ccfaab8ab 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -464,7 +464,6 @@ def _write_list(name, target_folder, files, comment): list_cont.append(f' {os.path.join(target_folder, fname)}') list_cont.append(')') - list_cont.append(f'SET_PROPERTY(SOURCE ${{{name}}} PROPERTY GENERATED TRUE)\n') return '\n'.join(list_cont) From 09d17d49f434e23663137eadacfea4eaa3d58d48 Mon Sep 17 00:00:00 2001 From: Paul Gessinger Date: Mon, 3 Apr 2023 14:45:57 +0200 Subject: [PATCH 053/100] Do not reject build if ROOT is built with C++20 (#398) Co-authored-by: Paul Gessinger-Befurt --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e8a943b61..c4f3a96ac 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,8 +77,8 @@ find_package(ROOT REQUIRED COMPONENTS RIO Tree) # Check that root is compiled with a modern enough c++ standard get_target_property(ROOT_COMPILE_FEATURES ROOT::Core INTERFACE_COMPILE_FEATURES) -if (NOT "cxx_std_17" IN_LIST ROOT_COMPILE_FEATURES) - message(FATAL_ERROR "You are trying to build podio against a version of ROOT that has not been built with a sufficient c++ standard. podio requires c++17") +if (NOT "cxx_std_17" IN_LIST ROOT_COMPILE_FEATURES AND NOT "cxx_std_20" IN_LIST ROOT_COMPILE_FEATURES) + message(FATAL_ERROR "You are trying to build podio against a version of ROOT that has not been built with a sufficient c++ standard. podio requires c++17 or higher") endif() #Check if Python version detected matches the version used to build ROOT SET(Python_FIND_FRAMEWORK LAST) From d71e4c0392cca73a5fe4e83b333b13840d1d09cd Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 5 Apr 2023 13:36:10 +0200 Subject: [PATCH 054/100] Define PODIO_ENABLE_SIO for targets depending on podioSioIO (#399) --- src/CMakeLists.txt | 2 +- tests/CMakeLists.txt | 11 +++++------ tests/unittest.cpp | 8 ++++++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 589c073a6..ec7d4cfce 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -125,10 +125,10 @@ if(ENABLE_SIO) PODIO_ADD_LIB_AND_DICT(podioSioIO "${sio_headers}" "${sio_sources}" sio_selection.xml) target_link_libraries(podioSioIO PUBLIC podio::podio SIO::sio ${CMAKE_DL_LIBS} ${PODIO_FS_LIBS}) + target_compile_definitions(podioSioIO PUBLIC PODIO_ENABLE_SIO=1) # Make sure the legacy python bindings know about the SIO backend target_link_libraries(podioPythonStore PRIVATE podioSioIO) - target_compile_definitions(podioPythonStore PRIVATE PODIO_ENABLE_SIO=1) LIST(APPEND INSTALL_LIBRARIES podioSioIO podioSioIODict) endif() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c3944d4ea..1d0a440bd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -44,7 +44,7 @@ function(CREATE_PODIO_TEST sourcefile additional_libs) add_executable( ${name} ${sourcefile} ) add_test(NAME ${name} COMMAND ${name}) - target_link_libraries(${name} TestDataModel ExtensionDataModel ${additional_libs}) + target_link_libraries(${name} PRIVATE TestDataModel ExtensionDataModel ${additional_libs}) set_property(TEST ${name} PROPERTY ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH} # Clear the ROOT_INCLUDE_PATH for the tests, to avoid potential conflicts @@ -96,7 +96,7 @@ endif() if (DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR}) message(STATUS "Using test inputs stored in: " ${PODIO_TEST_INPUT_DATA_DIR}) add_executable(read-legacy-files read-legacy-files.cpp) - target_link_libraries(read-legacy-files TestDataModel TestDataModelDict podio::podioRootIO) + target_link_libraries(read-legacy-files PRIVATE TestDataModel TestDataModelDict podio::podioRootIO) add_test(NAME read-legacy-files COMMAND read-legacy-files ${PODIO_TEST_INPUT_DATA_DIR}/example.root) set_property(TEST read-legacy-files PROPERTY ENVIRONMENT @@ -157,8 +157,8 @@ if (TARGET TestDataModelSioBlocks) # These need to be linked against TTree explicitly, since it is not done # through another library and the TimedReader/Writer decorators are # header-only wrappers - target_link_libraries(write_timed_sio ROOT::Tree) - target_link_libraries(read_timed_sio ROOT::Tree) + target_link_libraries(write_timed_sio PRIVATE ROOT::Tree) + target_link_libraries(read_timed_sio PRIVATE ROOT::Tree) endif() #--- set some dependencies between the different tests to ensure input generating ones are run first @@ -170,7 +170,7 @@ set_property(TEST read_timed PROPERTY DEPENDS write_timed) set_property(TEST read_frame PROPERTY DEPENDS write_frame_root) add_executable(check_benchmark_outputs check_benchmark_outputs.cpp) -target_link_libraries(check_benchmark_outputs ROOT::Tree) +target_link_libraries(check_benchmark_outputs PRIVATE ROOT::Tree) add_test(NAME check_benchmark_outputs COMMAND check_benchmark_outputs write_benchmark_root.root read_benchmark_root.root) set_property(TEST check_benchmark_outputs PROPERTY DEPENDS read_timed write_timed) @@ -207,7 +207,6 @@ add_executable(unittest unittest.cpp frame.cpp) target_link_libraries(unittest PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioRootIO) if (ENABLE_SIO) target_link_libraries(unittest PRIVATE podio::podioSioIO) - target_compile_definitions(unittest PRIVATE PODIO_WITH_SIO) endif() # The unittests are a bit better and they are labelled so we can put together a diff --git a/tests/unittest.cpp b/tests/unittest.cpp index e16b2eb9c..81090e145 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -15,7 +15,11 @@ #include "podio/ROOTLegacyReader.h" #include "podio/ROOTReader.h" #include "podio/podioVersion.h" -#ifdef PODIO_WITH_SIO + +#ifndef PODIO_ENABLE_SIO + #define PODIO_ENABLE_SIO 0 +#endif +#if PODIO_ENABLE_SIO #include "podio/SIOFrameReader.h" #include "podio/SIOLegacyReader.h" #include "podio/SIOReader.h" @@ -1061,7 +1065,7 @@ TEST_CASE("Missing files (ROOT readers)", "[basics]") { REQUIRE_THROWS_AS(root_frame_reader.openFile("NonExistentFile.root"), std::runtime_error); } -#ifdef PODIO_WITH_SIO +#if PODIO_ENABLE_SIO TEST_CASE("Missing files (SIO readers)", "[basics]") { auto sio_reader = podio::SIOReader(); REQUIRE_THROWS_AS(sio_reader.openFile("NonExistentFile.sio"), std::runtime_error); From f4c9219ddde59b1efc73db3fd9c25139222a7b3c Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 12 Apr 2023 10:04:21 +0200 Subject: [PATCH 055/100] Make SIOFrameData work properly with dropped collections (#400) --- src/SIOFrameData.cc | 11 ++++-- tests/read_frame.cpp | 8 +++-- tests/read_frame_auxiliary.h | 65 ++++++++++++++++++++++++++++++++++++ tests/read_frame_sio.cpp | 8 +++-- 4 files changed, 83 insertions(+), 9 deletions(-) create mode 100644 tests/read_frame_auxiliary.h diff --git a/src/SIOFrameData.cc b/src/SIOFrameData.cc index e7c87e9e6..1d2b08b39 100644 --- a/src/SIOFrameData.cc +++ b/src/SIOFrameData.cc @@ -19,7 +19,8 @@ std::optional SIOFrameData::getCollectionBuffers(c // collection indices start at 1! const auto index = std::distance(std::begin(names), nameIt) + 1; - m_availableBlocks[index] = 1; + // Mark this block as consumed + m_availableBlocks[index] = 0; return {dynamic_cast(m_blocks[index].get())->getBuffers()}; } @@ -35,9 +36,13 @@ std::unique_ptr SIOFrameData::getParameters() { std::vector SIOFrameData::getAvailableCollections() { unpackBuffers(); std::vector collections; - for (size_t i = 0; i < m_blocks.size(); ++i) { + for (size_t i = 1; i < m_blocks.size(); ++i) { if (m_availableBlocks[i]) { - collections.push_back(m_idTable.name(i)); + // We have to get the collID of this collection in the idTable as there is + // no guarantee that it coincides with the index in the blocks. + // Additionally, collection indices start at 1 + const auto collID = m_idTable.ids()[i - 1]; + collections.push_back(m_idTable.name(collID)); } } diff --git a/tests/read_frame.cpp b/tests/read_frame.cpp index 5a0929627..2b25b4cf6 100644 --- a/tests/read_frame.cpp +++ b/tests/read_frame.cpp @@ -1,7 +1,9 @@ -#include "podio/ROOTFrameReader.h" - #include "read_frame.h" +#include "read_frame_auxiliary.h" + +#include "podio/ROOTFrameReader.h" int main() { - return read_frames("example_frame.root"); + return read_frames("example_frame.root") + + test_frame_aux_info("example_frame.root"); } diff --git a/tests/read_frame_auxiliary.h b/tests/read_frame_auxiliary.h new file mode 100644 index 000000000..0546d6cd0 --- /dev/null +++ b/tests/read_frame_auxiliary.h @@ -0,0 +1,65 @@ +#ifndef PODIO_TESTS_READ_FRAME_AUXILIARY_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_READ_FRAME_AUXILIARY_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include "write_frame.h" + +#include "podio/Frame.h" + +#include +#include +#include + +bool present(const std::string& elem, const std::vector& vec) { + return std::find(vec.begin(), vec.end(), elem) != vec.end(); +} + +int testGetAvailableCollections(const podio::Frame& frame, const std::vector& expected) { + const auto& collNames = frame.getAvailableCollections(); + int result = 0; + for (const auto& name : expected) { + if (!present(name, collNames)) { + std::cerr << "Cannot find expected collection " << name << " in collections of Frame" << std::endl; + result = 1; + } + } + + // Get a few collections and make sure that the resutls are unchanged (apart + // from ordering) + frame.get("hitRefs"); + frame.get("mcparticles"); + + const auto& newCollNames = frame.getAvailableCollections(); + for (const auto& name : newCollNames) { + if (!present(name, collNames)) { + std::cerr << "getAvailableCollections returns different collections after getting collections" << std::endl; + return 1; + } + } + + return result; +} + +/** + * Test function for testing some auxiliary functionality of the Frame. + * Encapsulates everything, such that a corresponding main function boils down + * to including the reader to test and defining a main that invokes and returns + * this function. + * + * @param fileName the name of the file to read from + * @tparam ReaderT a Frame based I/O capable reader + * @return 0 if all checks pass, non-zero otherwise + * */ +template +int test_frame_aux_info(const std::string& fileName) { + auto reader = ReaderT{}; + reader.openFile(fileName); + + // Test on the first event only here. Additionally, also only testing the + // "events" category, since that is the one where not all collections are + // written + auto event = podio::Frame(reader.readEntry("events", 0)); + + return testGetAvailableCollections(event, collsToWrite); +} + +#endif // PODIO_TESTS_READ_FRAME_AUXILIARY_H diff --git a/tests/read_frame_sio.cpp b/tests/read_frame_sio.cpp index cdc0b8854..5f69d4da1 100644 --- a/tests/read_frame_sio.cpp +++ b/tests/read_frame_sio.cpp @@ -1,7 +1,9 @@ -#include "podio/SIOFrameReader.h" - #include "read_frame.h" +#include "read_frame_auxiliary.h" + +#include "podio/SIOFrameReader.h" int main() { - return read_frames("example_frame.sio"); + return read_frames("example_frame.sio") + + test_frame_aux_info("example_frame.sio"); } From ffc34c1f1fb265e731f95090050e01ddd6a4b59f Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Fri, 21 Apr 2023 09:53:42 +0200 Subject: [PATCH 056/100] Add an argument to exclude SIO files from some tests (#387) * Actually check extension model dumping * Rework script and CMakeLists to pass in more information in environment * Also check when clang format is used --------- Co-authored-by: jmcarcell Co-authored-by: Thomas Madlener --- tests/CMakeLists.txt | 36 +++++++++++++++++++++++------ tests/scripts/dumpModelRoundTrip.sh | 26 +++++++++++++-------- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1d0a440bd..92263ea8e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -250,9 +250,19 @@ endif() # Add tests for storing and retrieving the EDM definitions into the produced # files -add_test(datamodel_def_store_roundtrip_root ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh ${CMAKE_CURRENT_BINARY_DIR}/example_frame.root datamodel) -add_test(datamodel_def_store_roundtrip_root_extension ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh ${CMAKE_CURRENT_BINARY_DIR}/example_frame.root datamodel extension_datamodel) - +add_test(datamodel_def_store_roundtrip_root ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh + ${CMAKE_CURRENT_BINARY_DIR}/example_frame.root + datamodel + ${CMAKE_CURRENT_LIST_DIR} + ) +# The extension model needs to know about the upstream model for generation +add_test(datamodel_def_store_roundtrip_root_extension + ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh + ${CMAKE_CURRENT_BINARY_DIR}/example_frame.root + extension_model + ${CMAKE_CURRENT_LIST_DIR}/extension_model + --upstream-edm=datamodel:${CMAKE_CURRENT_LIST_DIR}/datalayout.yaml + ) # Need the input files that are produced by other tests set_tests_properties( @@ -263,9 +273,21 @@ set_tests_properties( ) set(sio_roundtrip_tests "") -if (TARGET read_sio) - add_test(datamodel_def_store_roundtrip_sio ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh ${CMAKE_CURRENT_BINARY_DIR}/example_frame.sio datamodel) - add_test(datamodel_def_store_roundtrip_sio_extension ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh ${CMAKE_CURRENT_BINARY_DIR}/example_frame.sio datamodel extension_datamodel) +if (ENABLE_SIO) + add_test(datamodel_def_store_roundtrip_sio + ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh + ${CMAKE_CURRENT_BINARY_DIR}/example_frame.sio + datamodel + ${CMAKE_CURRENT_LIST_DIR} + ) + # The extension model needs to know about the upstream model for generation + add_test(datamodel_def_store_roundtrip_sio_extension + ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh + ${CMAKE_CURRENT_BINARY_DIR}/example_frame.sio + extension_model + ${CMAKE_CURRENT_LIST_DIR}/extension_model + --upstream-edm=datamodel:${CMAKE_CURRENT_LIST_DIR}/datalayout.yaml + ) set(sio_roundtrip_tests datamodel_def_store_roundtrip_sio @@ -289,5 +311,5 @@ set_tests_properties( PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ENVIRONMENT - "PODIO_BASE=${CMAKE_SOURCE_DIR};IO_HANDLERS=${IO_HANDLERS};LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH};PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH};ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH}" + "PODIO_BASE=${CMAKE_SOURCE_DIR};IO_HANDLERS=${IO_HANDLERS};ENABLE_SIO=${ENABLE_SIO};PODIO_USE_CLANG_FORMAT=${PODIO_USE_CLANG_FORMAT};LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH};PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH};ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH}" ) diff --git a/tests/scripts/dumpModelRoundTrip.sh b/tests/scripts/dumpModelRoundTrip.sh index 9f9bc2148..077278a35 100755 --- a/tests/scripts/dumpModelRoundTrip.sh +++ b/tests/scripts/dumpModelRoundTrip.sh @@ -3,13 +3,16 @@ # the original definition. Essentially does not check that the YAML file is the # same, but rather that the generated code is the same -set -eu +set -eux INPUT_FILE=${1} # the datafile EDM_NAME=${2} # the name of the EDM -COMP_BASE_FOLDER="" # where the source to compare against is -if [ -$# -gt 2 ]; then - COMP_BASE_FOLDER=${3} +COMP_BASE_FOLDER=${3} # where the source to compare against is +shift 3 +EXTRA_GEN_ARGS=${@} + +if [ ${PODIO_USE_CLANG_FORMAT} = "ON" ] || [ ${PODIO_USE_CLANG_FORMAT} = "AUTO" ]; then + EXTRA_GEN_ARGS="${EXTRA_GEN_ARGS} --clangformat" fi # Create a few temporary but unique files and directories to store output @@ -20,9 +23,9 @@ mkdir -p ${OUTPUT_FOLDER} # Dump the model to a yaml file ${PODIO_BASE}/tools/podio-dump --dump-edm ${EDM_NAME} ${INPUT_FILE} > ${DUMPED_MODEL} -# Regenerate the code via the class generator and the freshly dumped modl +# Regenerate the code via the class generator and the freshly dumped model ${PODIO_BASE}/python/podio_class_generator.py \ - --clangformat \ + ${EXTRA_GEN_ARGS} \ ${DUMPED_MODEL} \ ${OUTPUT_FOLDER} \ ${EDM_NAME} \ @@ -31,6 +34,11 @@ ${PODIO_BASE}/python/podio_class_generator.py \ # Compare to the originally generated code, that has been used to write the data # file. Need to diff subfolders explitly here because $PODIO_BASE/tests contains # more stuff -diff -ru ${OUTPUT_FOLDER}/${EDM_NAME} ${PODIO_BASE}/tests/${COMP_BASE_FOLDER}/${EDM_NAME} -diff -ru ${OUTPUT_FOLDER}/src ${PODIO_BASE}/tests/${COMP_BASE_FOLDER}/src -diff -u ${OUTPUT_FOLDER}/podio_generated_files.cmake ${PODIO_BASE}/tests/podio_generated_files.cmake +DIFF_EXTRA_ARGS="" +if [ ${ENABLE_SIO} = "OFF" ]; then + DIFF_EXTRA_ARGS=--exclude='*SIO*' +fi + +diff -ru ${OUTPUT_FOLDER}/${EDM_NAME} ${COMP_BASE_FOLDER}/${EDM_NAME} ${DIFF_EXTRA_ARGS} +diff -ru ${OUTPUT_FOLDER}/src ${COMP_BASE_FOLDER}/src ${DIFF_EXTRA_ARGS} +diff -u ${OUTPUT_FOLDER}/podio_generated_files.cmake ${COMP_BASE_FOLDER}/podio_generated_files.cmake From 705721d53bcdd68c265351b86d4973c5bca3c0f9 Mon Sep 17 00:00:00 2001 From: Christopher Dilks Date: Sat, 22 Apr 2023 15:40:23 -0400 Subject: [PATCH 057/100] fix: use type `std::size_t` for index in `Collection` element accessors and `size()` (#408) * fix: use type `size_t` for index in `Collection` element accessors * fix: `unsigned int` -> `size_t` in `Collection.cc.jinja2` * fix: use `size_t` in `macro multi_relation_handling` * fix: `size_t` -> `std::size_t` * fix: add `#include ` to `{,Mutable}Object.h.jinja2` --- python/templates/Collection.cc.jinja2 | 10 +++++----- python/templates/Collection.h.jinja2 | 11 ++++++----- python/templates/MutableObject.h.jinja2 | 1 + python/templates/Object.h.jinja2 | 1 + python/templates/macros/declarations.jinja2 | 4 ++-- python/templates/macros/implementations.jinja2 | 4 ++-- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 185b98971..f7a5bcfd8 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -28,23 +28,23 @@ m_storage.clear(m_isSubsetColl); } -{{ class.bare_type }} {{ collection_type }}::operator[](unsigned int index) const { +{{ class.bare_type }} {{ collection_type }}::operator[](std::size_t index) const { return {{ class.bare_type }}(m_storage.entries[index]); } -{{ class.bare_type }} {{ collection_type }}::at(unsigned int index) const { +{{ class.bare_type }} {{ collection_type }}::at(std::size_t index) const { return {{ class.bare_type }}(m_storage.entries.at(index)); } -Mutable{{ class.bare_type }} {{ collection_type }}::operator[](unsigned int index) { +Mutable{{ class.bare_type }} {{ collection_type }}::operator[](std::size_t index) { return Mutable{{ class.bare_type }}(m_storage.entries[index]); } -Mutable{{ class.bare_type }} {{ collection_type }}::at(unsigned int index) { +Mutable{{ class.bare_type }} {{ collection_type }}::at(std::size_t index) { return Mutable{{ class.bare_type }}(m_storage.entries.at(index)); } -size_t {{ collection_type }}::size() const { +std::size_t {{ collection_type }}::size() const { return m_storage.entries.size(); } diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index f66a4fb89..6c6939456 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -30,6 +30,7 @@ #include #include #include +#include {{ utils.namespace_open(class.namespace) }} @@ -74,7 +75,7 @@ public: Mutable{{ class.bare_type }} create(Args&&... args); /// number of elements in the collection - size_t size() const final; + std::size_t size() const final; /// fully qualified type name std::string getTypeName() const final { return std::string("{{ (class | string ).strip(':')+"Collection" }}"); } @@ -92,13 +93,13 @@ public: void setSubsetCollection(bool setSubset=true) final; /// Returns the const object of given index - {{ class.bare_type }} operator[](unsigned int index) const; + {{ class.bare_type }} operator[](std::size_t index) const; /// Returns the object of a given index - Mutable{{ class.bare_type }} operator[](unsigned int index); + Mutable{{ class.bare_type }} operator[](std::size_t index); /// Returns the const object of given index - {{ class.bare_type }} at(unsigned int index) const; + {{ class.bare_type }} at(std::size_t index) const; /// Returns the object of given index - Mutable{{ class.bare_type }} at(unsigned int index); + Mutable{{ class.bare_type }} at(std::size_t index); /// Append object to the collection diff --git a/python/templates/MutableObject.h.jinja2 b/python/templates/MutableObject.h.jinja2 index 8d5fa8bf3..ab39778a0 100644 --- a/python/templates/MutableObject.h.jinja2 +++ b/python/templates/MutableObject.h.jinja2 @@ -14,6 +14,7 @@ {% endfor %} #include "podio/ObjectID.h" #include +#include #ifdef PODIO_JSON_OUTPUT #include "nlohmann/json.hpp" diff --git a/python/templates/Object.h.jinja2 b/python/templates/Object.h.jinja2 index 05303cb14..0a6de9296 100644 --- a/python/templates/Object.h.jinja2 +++ b/python/templates/Object.h.jinja2 @@ -12,6 +12,7 @@ {% endfor %} #include "podio/ObjectID.h" #include +#include #ifdef PODIO_JSON_OUTPUT #include "nlohmann/json.hpp" diff --git a/python/templates/macros/declarations.jinja2 b/python/templates/macros/declarations.jinja2 index b5d150d5c..4ee9522f4 100644 --- a/python/templates/macros/declarations.jinja2 +++ b/python/templates/macros/declarations.jinja2 @@ -125,8 +125,8 @@ {% if with_adder %} void {{ relation.setter_name(get_syntax, is_relation=True) }}({{ relation.full_type }}); {% endif %} - unsigned int {{ relation.name }}_size() const; - {{ relation.full_type }} {{ relation.getter_name(get_syntax) }}(unsigned int) const; + std::size_t {{ relation.name }}_size() const; + {{ relation.full_type }} {{ relation.getter_name(get_syntax) }}(std::size_t) const; std::vector<{{ relation.full_type }}>::const_iterator {{ relation.name }}_begin() const; std::vector<{{ relation.full_type }}>::const_iterator {{ relation.name }}_end() const; podio::RelationRange<{{ relation.full_type }}> {{ relation.getter_name(get_syntax) }}() const; diff --git a/python/templates/macros/implementations.jinja2 b/python/templates/macros/implementations.jinja2 index 8924aa96c..821056fba 100644 --- a/python/templates/macros/implementations.jinja2 +++ b/python/templates/macros/implementations.jinja2 @@ -128,11 +128,11 @@ std::vector<{{ relation.full_type }}>::const_iterator {{ class_type }}::{{ relat return ret_value; } -unsigned int {{ class_type }}::{{ relation.name }}_size() const { +std::size_t {{ class_type }}::{{ relation.name }}_size() const { return m_obj->data.{{ relation.name }}_end - m_obj->data.{{ relation.name }}_begin; } -{{ relation.full_type }} {{ class_type }}::{{ relation.getter_name(get_syntax) }}(unsigned int index) const { +{{ relation.full_type }} {{ class_type }}::{{ relation.getter_name(get_syntax) }}(std::size_t index) const { if ({{ relation.name }}_size() > index) { return m_obj->m_{{ relation.name }}->at(m_obj->data.{{ relation.name }}_begin + index); } From d294ae658a3795cc0db3923faa76b35b0d9c80a9 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 9 May 2023 15:42:16 +0200 Subject: [PATCH 058/100] Decouple the collection buffer creation from the collection (#394) * Add schema version to datamodel meta information * Use meta namespace to get schema version info * Make sure to have non-zero positive schema versions * Remove unnecessary attributes * Add basic buffer factory and switch over UserDataCollections * Add documentation --- include/podio/CollectionBase.h | 9 -- include/podio/CollectionBufferFactory.h | 84 +++++++++++++++++++ include/podio/ROOTFrameReader.h | 7 +- include/podio/ROOTLegacyReader.h | 8 +- include/podio/SIOBlock.h | 7 +- include/podio/SIOBlockUserData.h | 26 +++--- include/podio/UserDataCollection.h | 34 ++++---- python/podio/generator_utils.py | 3 +- python/podio/podio_config_reader.py | 6 +- python/podio_class_generator.py | 12 +-- python/templates/Collection.cc.jinja2 | 76 ++++++++++++----- python/templates/Collection.h.jinja2 | 10 +-- python/templates/DatamodelDefinition.h.jinja2 | 6 ++ python/templates/SIOBlock.cc.jinja2 | 36 ++++---- python/templates/SIOBlock.h.jinja2 | 12 +-- src/CMakeLists.txt | 2 + src/CollectionBufferFactory.cc | 59 +++++++++++++ src/ROOTFrameReader.cc | 42 ++-------- src/ROOTLegacyReader.cc | 41 ++------- src/SIOBlock.cc | 2 +- src/UserDataCollection.cc | 54 ++++++++++++ tests/datalayout.yaml | 2 +- tests/datalayout_old.yaml | 4 +- tests/schema_evolution.yaml | 4 +- 24 files changed, 348 insertions(+), 198 deletions(-) create mode 100644 include/podio/CollectionBufferFactory.h create mode 100644 src/CollectionBufferFactory.cc create mode 100644 src/UserDataCollection.cc diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index d2dc1a626..7e2a51b2c 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -47,15 +47,6 @@ class CollectionBase { /// Get the collection buffers for this collection virtual podio::CollectionWriteBuffers getBuffers() = 0; - /// Create (empty) collection buffers from which a collection can be constructed - virtual podio::CollectionReadBuffers createBuffers() /*const*/ = 0; - - /// Create (empty) collection buffers from which a collection can be constructed - /// Versioned to support schema evolution - virtual podio::CollectionReadBuffers createSchemaEvolvableBuffers(int readSchemaVersion, - podio::Backend backend) /*const*/ - = 0; - /// check for validity of the container after read virtual bool isValid() const = 0; diff --git a/include/podio/CollectionBufferFactory.h b/include/podio/CollectionBufferFactory.h new file mode 100644 index 000000000..c3c45692b --- /dev/null +++ b/include/podio/CollectionBufferFactory.h @@ -0,0 +1,84 @@ +#ifndef PODIO_COLLECTIONBUFFERFACTORY_H +#define PODIO_COLLECTIONBUFFERFACTORY_H + +#include "podio/CollectionBuffers.h" +#include "podio/SchemaEvolution.h" + +#include +#include +#include +#include + +namespace podio { + +/** + * The CollectionBufferFactory allows to create buffers of known datatypes, + * which can then be populated by e.g. readers. In order to support schema + * evolution, the buffers have a version and this factory will also require a + * schema version to create buffers. + * + * It is implemented as a singleton, which is populated at the time a shared + * datamodel library is loaded. It is assumed that that happens early on in the + * startup of an appliation, such that only a single thread will access the + * factory instance for registering datatypes. Since the necessary creation + * functions are part of the core datamodel library, this should be very easy to + * achieve by simply linking to that library. Once the factory is populated it + * can be safely accessed from multiple threads concurrently to obtain buffers. + */ +class CollectionBufferFactory { + /// Internal storage is a map to an array of creation functions, where the + /// version determines the place in that array. This should be a viable + /// approach because we know the "latest and greatest" schema version + using CreationFuncT = std::function; + using VersionMapT = std::vector; + using MapT = std::unordered_map; + +public: + /// The buffer factory is a singleton so we disable all copy and move + /// constructors explicitly + CollectionBufferFactory(CollectionBufferFactory const&) = delete; + CollectionBufferFactory& operator=(CollectionBufferFactory const&) = delete; + CollectionBufferFactory(CollectionBufferFactory&&) = delete; + CollectionBufferFactory& operator=(CollectionBufferFactory&&) = delete; + ~CollectionBufferFactory() = default; + + /// Mutable instance only used for the initial registration of functions + /// during library loading + static CollectionBufferFactory& mutInstance(); + /// Get the factory instance + static CollectionBufferFactory const& instance(); + + /** + * Create buffers for a given collection type of a given schema version. + * + * @param collType The collection type name (e.g. from collection->getTypeName()) + * @param version The schema version the created buffers should have + * @param susbsetColl Should the buffers be for a subset collection or not + * + * @return CollectionReadBuffers if a creation function for this collection + * type has been registered, otherwise an empty optional + */ + std::optional createBuffers(const std::string& collType, SchemaVersionT version, + bool subsetColl) const; + /** + * Register a creation function for a given collection type and schema version. + * + * @param collType The collection type name (i.e. what + * collection->getTypeName() returns) + * @param version The schema version for which this creation function is valid + * @param creationFunc The function that when invoked returns buffers for this + * collection type and schema version. The signature has to be + * podio::CollectionReadBuffers(bool) where the boolean parameter steers + * whether the buffers are for a subset collection or not. + */ + void registerCreationFunc(const std::string& collType, SchemaVersionT version, const CreationFuncT& creationFunc); + +private: + CollectionBufferFactory() = default; + + MapT m_funcMap{}; ///< Map to the creation functions +}; + +} // namespace podio + +#endif // PODIO_COLLECTIONBUFFERFACTORY_H diff --git a/include/podio/ROOTFrameReader.h b/include/podio/ROOTFrameReader.h index 1a2f48a4d..3c493e4ca 100644 --- a/include/podio/ROOTFrameReader.h +++ b/include/podio/ROOTFrameReader.h @@ -25,9 +25,10 @@ class TTree; namespace podio { namespace detail { - // Information about the data vector as wall as the collection class type - // and the index in the collection branches cache vector - using CollectionInfo = std::tuple; + // Information about the collection class type, whether it is a subset, the + // schema version on file and the index in the collection branches cache + // vector + using CollectionInfo = std::tuple; } // namespace detail diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h index b6fed99f1..4b52b91c6 100644 --- a/include/podio/ROOTLegacyReader.h +++ b/include/podio/ROOTLegacyReader.h @@ -23,10 +23,10 @@ class TTree; namespace podio { namespace detail { - // Information about the data vector as wall as the collection class type - // and the index in the collection branches cache vector - using CollectionInfo = std::tuple; - + // Information about the collection class type, whether it is a subset, the + // schema version on file and the index in the collection branches cache + // vector + using CollectionInfo = std::tuple; } // namespace detail class EventStore; diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 3e02561b8..5834a9b5b 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -78,6 +78,10 @@ class SIOBlock : public sio::block { return sio::block::name(); } + void setSubsetCollection(bool subsetColl) { + m_subsetColl = subsetColl; + } + void setCollection(podio::CollectionBase* col) { m_subsetColl = col->isSubsetCollection(); m_buffers = col->getBuffers(); @@ -85,9 +89,6 @@ class SIOBlock : public sio::block { virtual SIOBlock* create(const std::string& name) const = 0; - // create a new collection for this block - virtual void createBuffers(const bool subsetCollection = false) = 0; - protected: bool m_subsetColl{false}; podio::CollectionReadBuffers m_buffers{}; diff --git a/include/podio/SIOBlockUserData.h b/include/podio/SIOBlockUserData.h index 7ce28cd8e..2c2c5d275 100644 --- a/include/podio/SIOBlockUserData.h +++ b/include/podio/SIOBlockUserData.h @@ -1,6 +1,7 @@ #ifndef PODIO_SIOBLOCKUSERDATA_H #define PODIO_SIOBLOCKUSERDATA_H +#include "podio/CollectionBufferFactory.h" #include "podio/CollectionBuffers.h" #include "podio/SIOBlock.h" #include "podio/UserDataCollection.h" @@ -29,15 +30,23 @@ namespace podio { template > class SIOBlockUserData : public podio::SIOBlock { public: - SIOBlockUserData() : SIOBlock(::sio_name(), sio::version::encode_version(0, 1)) { + SIOBlockUserData() : + SIOBlock(::sio_name(), sio::version::encode_version(UserDataCollection::schemaVersion, 0)) { podio::SIOBlockFactory::instance().registerBlockForCollection(podio::userDataTypeName(), this); } - SIOBlockUserData(const std::string& name) : SIOBlock(name, sio::version::encode_version(0, 1)) { + SIOBlockUserData(const std::string& name) : + SIOBlock(name, sio::version::encode_version(UserDataCollection::schemaVersion, 0)) { } - void read(sio::read_device& device, sio::version_type /*version*/) override { + void read(sio::read_device& device, sio::version_type version) override { + const auto& bufferFactory = podio::CollectionBufferFactory::instance(); + m_buffers = + bufferFactory + .createBuffers(podio::userDataCollTypeName(), sio::version::major_version(version), false) + .value(); + auto* dataVec = new std::vector(); unsigned size(0); device.data(size); @@ -53,17 +62,6 @@ class SIOBlockUserData : public podio::SIOBlock { podio::handlePODDataSIO(device, &(*dataVec)[0], size); } - void createBuffers(bool) override { - - m_buffers.references = new podio::CollRefCollection(); - m_buffers.vectorMembers = new podio::VectorMembersInfo(); - - // Nothing to do here since UserDataCollections cannot be subset collections - m_buffers.createCollection = [](podio::CollectionReadBuffers buffers, bool) { - return std::make_unique>(std::move(*buffers.dataAsVector())); - }; - } - SIOBlock* create(const std::string& name) const override { return new SIOBlockUserData(name); } diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index b3d910ed8..78abda49a 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -4,6 +4,7 @@ #include "podio/CollectionBase.h" #include "podio/CollectionBuffers.h" #include "podio/DatamodelRegistry.h" +#include "podio/SchemaEvolution.h" #include "podio/utilities/TypeHelpers.h" #include @@ -16,6 +17,10 @@ template <> \ constexpr const char* userDataTypeName() { \ return #type; \ + } \ + template <> \ + constexpr const char* userDataCollTypeName() { \ + return "podio::UserDataCollection<" #type ">"; \ } namespace podio { @@ -37,6 +42,12 @@ using EnableIfSupportedUserType = std::enable_if_t> constexpr const char* userDataTypeName(); +/** Helper template to provide the fully qualified name of a UserDataCollection. + * Implementations are populated by the PODIO_ADD_USER_TYPE macro. + */ +template > +constexpr const char* userDataCollTypeName(); + PODIO_ADD_USER_TYPE(float) PODIO_ADD_USER_TYPE(double) @@ -79,6 +90,9 @@ class UserDataCollection : public CollectionBase { UserDataCollection& operator=(UserDataCollection&&) = default; ~UserDataCollection() = default; + /// The schema version of UserDataCollections + static constexpr SchemaVersionT schemaVersion = 1; + /// prepare buffers for serialization void prepareForWrite() const override { } @@ -108,22 +122,6 @@ class UserDataCollection : public CollectionBase { return {&_vecPtr, &m_refCollections, &m_vecmem_info}; } - podio::CollectionReadBuffers createBuffers() /*const*/ final { - return {nullptr, nullptr, nullptr, - [](podio::CollectionReadBuffers buffers, bool) { - return std::make_unique>(std::move(*buffers.dataAsVector())); - }, - [](podio::CollectionReadBuffers& buffers) { - buffers.data = podio::CollectionWriteBuffers::asVector(buffers.data); - }}; - } - - podio::CollectionReadBuffers createSchemaEvolvableBuffers(__attribute__((unused)) int readSchemaVersion, - __attribute__((unused)) - podio::Backend backend) /*const*/ final { - return createBuffers(); - } - /// check for validity of the container after read bool isValid() const override { return true; @@ -136,7 +134,7 @@ class UserDataCollection : public CollectionBase { /// fully qualified type name std::string getTypeName() const override { - return std::string("podio::UserDataCollection<") + userDataTypeName() + ">"; + return userDataCollTypeName(); } /// fully qualified type name of elements - with namespace @@ -165,7 +163,7 @@ class UserDataCollection : public CollectionBase { /// The schema version is fixed manually SchemaVersionT getSchemaVersion() const final { - return 1; + return schemaVersion; } /// Print this collection to the passed stream diff --git a/python/podio/generator_utils.py b/python/podio/generator_utils.py index 600d11ca8..17d3a37f3 100644 --- a/python/podio/generator_utils.py +++ b/python/podio/generator_utils.py @@ -71,10 +71,9 @@ def _is_fixed_width_type(type_name): class DataType: """Simple class to hold information about a datatype or component that is defined in the datamodel.""" - def __init__(self, klass, schema_version): + def __init__(self, klass): self.full_type = klass self.namespace, self.bare_type = _get_namespace_class(self.full_type) - self.schema_version = schema_version def __str__(self): if self.namespace: diff --git a/python/podio/podio_config_reader.py b/python/podio/podio_config_reader.py index 79f43d1b2..b6836f0d3 100644 --- a/python/podio/podio_config_reader.py +++ b/python/podio/podio_config_reader.py @@ -412,10 +412,12 @@ def parse_model(cls, model_dict, package_name, upstream_edm=None): if "schema_version" in model_dict: schema_version = model_dict["schema_version"] + if int(schema_version) <= 0: + raise DefinitionError(f"schema_version has to be larger than 0 (is {schema_version})") else: - warnings.warn("Please provide a schema_version entry. It will become mandatory. Setting it to 0 as default", + warnings.warn("Please provide a schema_version entry. It will become mandatory. Setting it to 1 as default", FutureWarning, stacklevel=3) - schema_version = 0 + schema_version = 1 components = {} if "components" in model_dict: diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index ccfaab8ab..773055f10 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -118,7 +118,6 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr self.incfolder = self.datamodel.options['includeSubfolder'] self.expose_pod_members = self.datamodel.options["exposePODMembers"] self.upstream_edm = upstream_edm - self.schema_version = self.datamodel.schema_version self.clang_format = [] self.generated_files = [] @@ -264,7 +263,7 @@ def _process_component(self, name, component): includes.update(component.get("ExtraCode", {}).get("includes", "").split('\n')) component['includes'] = self._sort_includes(includes) - component['class'] = DataType(name, self.schema_version) + component['class'] = DataType(name) self._fill_templates('Component', component) @@ -411,7 +410,7 @@ def _preprocess_datatype(self, name, definition): # Make a copy here and add the preprocessing steps to that such that the # original definition can be left untouched data = deepcopy(definition) - data['class'] = DataType(name, self.schema_version) + data['class'] = DataType(name) data['includes_data'] = self._get_member_includes(definition["Members"]) self._preprocess_for_class(data) self._preprocess_for_obj(data) @@ -426,6 +425,7 @@ def _write_edm_def_file(self): 'package_name': self.package_name, 'edm_definition': model_encoder.encode(self.datamodel), 'incfolder': self.incfolder, + 'schema_version': self.datamodel.schema_version, } self._write_file('DatamodelDefinition.h', @@ -494,9 +494,9 @@ def _needs_include(self, classname) -> IncludeFrom: def _create_selection_xml(self): """Create the selection xml that is necessary for ROOT I/O""" - data = {'components': [DataType(c, self.schema_version) for c in self.datamodel.components], - 'datatypes': [DataType(d, self.schema_version) for d in self.datamodel.datatypes], - 'old_schema_components': [DataType(d, self.schema_version) for d in + data = {'components': [DataType(c) for c in self.datamodel.components], + 'datatypes': [DataType(d) for d in self.datamodel.datatypes], + 'old_schema_components': [DataType(d) for d in self.old_datamodels_datatypes | self.old_datamodels_components]} self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data)) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index f7a5bcfd8..1789a83f6 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -3,6 +3,8 @@ {% from "macros/iterator.jinja2" import iterator_definitions %} // AUTOMATICALLY GENERATED FILE - DO NOT EDIT +#include "podio/CollectionBufferFactory.h" + #include "{{ incfolder }}{{ class.bare_type }}Collection.h" #include "{{ incfolder }}DatamodelDefinition.h" @@ -150,18 +152,48 @@ podio::CollectionWriteBuffers {{ collection_type }}::getBuffers() { return m_storage.getCollectionBuffers(m_isSubsetColl); } -podio::CollectionReadBuffers {{ collection_type }}::createBuffers() /*const*/ { - // Very cumbersome way at the moment. We get the actual buffers to have the - // references and vector members sized appropriately (we will use this - // information to create new buffers outside) - auto collBuffers = m_storage.getCollectionBuffers(m_isSubsetColl); +{% for member in Members %} +{{ macros.vectorized_access(class, member) }} +{% endfor %} + +size_t {{ collection_type }}::getDatamodelRegistryIndex() const { + return {{ package_name }}::meta::DatamodelRegistryIndex::value(); +} + +podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const { + return {{ package_name }}::meta::schemaVersion; +} + +// anonymous namespace for registration with the CollectionBufferFactory. This +// ensures that we don't have to make up arbitrary namespace names here, since +// none of this is publicly visible +namespace { +podio::CollectionReadBuffers createBuffers(bool isSubset) { auto readBuffers = podio::CollectionReadBuffers{}; - readBuffers.references = collBuffers.references; - readBuffers.vectorMembers = collBuffers.vectorMembers; + readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; + + // The number of ObjectID vectors is either 1 or the sum of OneToMany and + // OneToOne relations + const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }}; + readBuffers.references = new podio::CollRefCollection(nRefs); + for (auto& ref : *readBuffers.references) { + // Make sure to place usable buffer pointers here + ref = std::make_unique>(); + } + + readBuffers.vectorMembers = new podio::VectorMembersInfo(); + if (!isSubset) { + readBuffers.vectorMembers->reserve({{ VectorMembers | length }}); +{% for member in VectorMembers %} + readBuffers.vectorMembers->emplace_back("{{ member.full_type }}", new std::vector<{{ member.full_type }}>); +{% endfor %} + } + readBuffers.createCollection = [](podio::CollectionReadBuffers buffers, bool isSubsetColl) { - {{ collection_type }}Data data(buffers, isSubsetColl); - return std::make_unique<{{ collection_type }}>(std::move(data), isSubsetColl); + {{ collection_type }}Data data(buffers, isSubsetColl); + return std::make_unique<{{ collection_type }}>(std::move(data), isSubsetColl); }; + readBuffers.recast = [](podio::CollectionReadBuffers& buffers) { if (buffers.data) { buffers.data = podio::CollectionWriteBuffers::asVector<{{ class.full_type }}Data>(buffers.data); @@ -172,26 +204,24 @@ podio::CollectionReadBuffers {{ collection_type }}::createBuffers() /*const*/ { {% endfor %} {% endif %} }; + return readBuffers; } -podio::CollectionReadBuffers {{ collection_type }}::createSchemaEvolvableBuffers(int readSchemaVersion, podio::Backend /*backend*/) /*const*/ { - // no version difference -> no-op - if (readSchemaVersion == {{ class.schema_version }}) { - return createBuffers(); - } - // default is no-op as well - return createBuffers(); +// The usual trick with an IIFE and a static variable inside a funtion and then +// making sure to call that function during shared library loading +bool registerCollection() { + const static auto reg = []() { + auto& factory = podio::CollectionBufferFactory::mutInstance(); + factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers); + return true; + }(); + return reg; } +const auto registeredCollection = registerCollection(); +} // namespace -{% for member in Members %} -{{ macros.vectorized_access(class, member) }} -{% endfor %} - -size_t {{ collection_type }}::getDatamodelRegistryIndex() const { - return {{ package_name }}::meta::DatamodelRegistryIndex::value(); -} #ifdef PODIO_JSON_OUTPUT void to_json(nlohmann::json& j, const {{ collection_type }}& collection) { diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index 6c6939456..f91c02db5 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -84,7 +84,7 @@ public: /// fully qualified type name of stored POD elements - with namespace std::string getDataTypeName() const final { return std::string("{{ (class | string ).strip(':')+"Data" }}"); } /// schema version - unsigned int getSchemaVersion() const final { return {{ class.schema_version }}; }; + podio::SchemaVersionT getSchemaVersion() const final; bool isSubsetCollection() const final { return m_isSubsetColl; @@ -112,14 +112,6 @@ public: /// Get the collection buffers for this collection podio::CollectionWriteBuffers getBuffers() final; - /// Create (empty) collection buffers from which a collection can be constructed - podio::CollectionReadBuffers createBuffers() /*const*/ final; - - /// Create (empty) collection buffers from which a collection can be constructed - /// Versioned to support schema evolution - podio::CollectionReadBuffers createSchemaEvolvableBuffers(int readSchemaVersion, podio::Backend backend) /*const*/ final; - - void setID(unsigned ID) final { m_collectionID = ID; if (!m_isSubsetColl) { diff --git a/python/templates/DatamodelDefinition.h.jinja2 b/python/templates/DatamodelDefinition.h.jinja2 index 17a300cb9..c424ded7f 100644 --- a/python/templates/DatamodelDefinition.h.jinja2 +++ b/python/templates/DatamodelDefinition.h.jinja2 @@ -1,6 +1,7 @@ // AUTOMATICALLY GENERATED FILE - DO NOT EDIT #include "podio/DatamodelRegistry.h" +#include "podio/SchemaEvolution.h" namespace {{ package_name }}::meta { /** @@ -27,4 +28,9 @@ private: size_t m_value{podio::DatamodelRegistry::NoDefinitionAvailable}; }; +/** + * The schema version at generation time + */ +static constexpr podio::SchemaVersionT schemaVersion = {{ schema_version }}; + } // namespace {{ package_name }}::meta diff --git a/python/templates/SIOBlock.cc.jinja2 b/python/templates/SIOBlock.cc.jinja2 index a33febb8c..f8090ea75 100644 --- a/python/templates/SIOBlock.cc.jinja2 +++ b/python/templates/SIOBlock.cc.jinja2 @@ -6,6 +6,7 @@ #include "{{ incfolder }}{{ class.bare_type }}Collection.h" #include "podio/CollectionBuffers.h" +#include "podio/CollectionBufferFactory.h" #include #include @@ -14,22 +15,19 @@ {{ utils.namespace_open(class.namespace) }} {% with block_class = class.bare_type + 'SIOBlock' %} -void {{ block_class }}::read(sio::read_device& device, sio::version_type) { - if (m_subsetColl) { - m_buffers.references->emplace_back(std::make_unique>()); - } else { -{% for relation in OneToManyRelations + OneToOneRelations %} - m_buffers.references->emplace_back(std::make_unique>()); -{% endfor %} - } +void {{ block_class }}::read(sio::read_device& device, sio::version_type version) { + const auto& bufferFactory = podio::CollectionBufferFactory::instance(); + // TODO: + // - Error handling of empty optional + auto maybeBuffers = bufferFactory.createBuffers("{{ class.full_type }}Collection", sio::version::major_version(version), m_subsetColl); + m_buffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); if (not m_subsetColl) { unsigned size(0); device.data( size ); - m_buffers.data = new std::vector<{{ class.full_type }}Data>(size); auto* dataVec = m_buffers.dataAsVector<{{ class.full_type }}Data>(); + dataVec->resize(size); podio::handlePODDataSIO(device, dataVec->data(), size); - // m_buffers.data = dataVec; } //---- read ref collections ----- @@ -84,18 +82,12 @@ void {{ block_class }}::write(sio::write_device& device) { {% endif %} } -void {{ block_class }}::createBuffers(bool subsetColl) { - m_subsetColl = subsetColl; - - - - m_buffers.references = new podio::CollRefCollection(); - m_buffers.vectorMembers = new podio::VectorMembersInfo(); - - m_buffers.createCollection = [](podio::CollectionReadBuffers buffers, bool isSubsetColl) { - {{ class.bare_type }}CollectionData data(buffers, isSubsetColl); - return std::make_unique<{{ class.bare_type }}Collection>(std::move(data), isSubsetColl); - }; +namespace { + // Create one instance of the type in order to ensure that the SioBlock + // library actually needs linking to the core library. Otherwise it is + // possible that the registry is not populated when the SioBlock library is + // loaded, e.g. when using the python bindings. + const auto elem = {{ class.full_type }}{}; } {% endwith %} diff --git a/python/templates/SIOBlock.h.jinja2 b/python/templates/SIOBlock.h.jinja2 index 62d210852..c25eaebc9 100644 --- a/python/templates/SIOBlock.h.jinja2 +++ b/python/templates/SIOBlock.h.jinja2 @@ -4,6 +4,8 @@ #ifndef {{ package_name.upper() }}_{{ class.bare_type }}SIOBlock_H #define {{ package_name.upper() }}_{{ class.bare_type }}SIOBlock_H +#include "{{ incfolder }}DatamodelDefinition.h" + #include "podio/SIOBlock.h" #include @@ -23,13 +25,12 @@ namespace podio { class {{ block_class }}: public podio::SIOBlock { public: {{ block_class }}() : - SIOBlock("{{ class.bare_type }}", sio::version::encode_version(0, 1)) { + SIOBlock("{{ class.bare_type }}", sio::version::encode_version({{ package_name }}::meta::schemaVersion, 0)) { podio::SIOBlockFactory::instance().registerBlockForCollection("{{class.full_type}}", this); } {{ block_class }}(const std::string& name) : - // SIOBlock(name + "__{{ class.bare_type }}", sio::version::encode_version(0, 1)) {} - SIOBlock(name, sio::version::encode_version(0, 1)) {} + SIOBlock(name, sio::version::encode_version({{ package_name }}::meta::schemaVersion, 0)) {} // Read the collection data from the device void read(sio::read_device& device, sio::version_type version) override; @@ -37,12 +38,7 @@ public: // Write the collection data to the device void write(sio::write_device& device) override; - void createBuffers(bool isSubsetColl) override; - SIOBlock* create(const std::string& name) const override { return new {{ block_class }}(name); } - -private: - podio::CollectionReadBuffers createBuffers() const; }; static {{ block_class }} _dummy{{ block_class }}; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ec7d4cfce..ae5f0f984 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -51,6 +51,8 @@ SET(core_sources EventStore.cc DatamodelRegistry.cc DatamodelRegistryIOHelpers.cc + UserDataCollection.cc + CollectionBufferFactory.cc ) SET(core_headers diff --git a/src/CollectionBufferFactory.cc b/src/CollectionBufferFactory.cc new file mode 100644 index 000000000..8c48ea083 --- /dev/null +++ b/src/CollectionBufferFactory.cc @@ -0,0 +1,59 @@ +#include "podio/CollectionBufferFactory.h" +#include "podio/CollectionBuffers.h" + +namespace podio { +CollectionBufferFactory& CollectionBufferFactory::mutInstance() { + static CollectionBufferFactory factory; + return factory; +} + +CollectionBufferFactory const& CollectionBufferFactory::instance() { + return mutInstance(); +} + +std::optional +CollectionBufferFactory::createBuffers(const std::string& collType, SchemaVersionT version, bool subsetColl) const { + if (const auto typeIt = m_funcMap.find(collType); typeIt != m_funcMap.end()) { + const auto& [_, versionMap] = *typeIt; + if (versionMap.size() >= version) { + return versionMap[version - 1](subsetColl); + } + } + + return std::nullopt; +} + +void CollectionBufferFactory::registerCreationFunc(const std::string& collType, SchemaVersionT version, + const CreationFuncT& creationFunc) { + // Check if we have an entry already to which we can add information + auto typeIt = m_funcMap.find(collType); + if (typeIt != m_funcMap.end()) { + auto& versionMap = typeIt->second; + // If we already have something for this type, make sure to handle all + // versions correctly, assuming that all present creation functions are + // unchanged and that all non-present creation functions behave the same as + // this (assumed latest) version + const auto prevSize = versionMap.size(); + if (prevSize < version) { + versionMap.resize(version); + for (auto i = prevSize; i < version; ++i) { + versionMap[i] = creationFunc; + } + } else { + // In this case we are explicitly updating one specific version + versionMap[version - 1] = creationFunc; + } + } else { + // If we have a completely new map, than we simply populate all versions + // with this creation function + VersionMapT versionMap; + versionMap.reserve(version); + for (size_t i = 0; i < version; ++i) { + versionMap.emplace_back(creationFunc); + } + + m_funcMap.emplace(collType, std::move(versionMap)); + } +} + +} // namespace podio diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index f4ba00bc4..d150789df 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -1,5 +1,6 @@ #include "podio/ROOTFrameReader.h" #include "podio/CollectionBase.h" +#include "podio/CollectionBufferFactory.h" #include "podio/CollectionBuffers.h" #include "podio/CollectionIDTable.h" #include "podio/GenericParameters.h" @@ -66,40 +67,14 @@ std::unique_ptr ROOTFrameReader::readEntry(ROOTFrameReader::Categ podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameReader::CategoryInfo& catInfo, size_t iColl) { const auto& name = catInfo.storedClasses[iColl].first; - const auto& [theClass, collectionClass, index] = catInfo.storedClasses[iColl].second; + const auto& [collType, isSubsetColl, schemaVersion, index] = catInfo.storedClasses[iColl].second; auto& branches = catInfo.branches[index]; - // Create empty collection buffers, and connect them to the right branches - auto collBuffers = podio::CollectionReadBuffers(); - // If we have a valid data buffer class we know that have to read data, - // otherwise we are handling a subset collection - const bool isSubsetColl = theClass == nullptr; - if (!isSubsetColl) { - collBuffers.data = theClass->New(); - } - - { - auto collection = - std::unique_ptr(static_cast(collectionClass->New())); - collection->setSubsetCollection(isSubsetColl); - - auto tmpBuffers = collection->createBuffers(); - collBuffers.createCollection = std::move(tmpBuffers.createCollection); - collBuffers.recast = std::move(tmpBuffers.recast); + const auto& bufferFactory = podio::CollectionBufferFactory::instance(); + auto maybeBuffers = bufferFactory.createBuffers(collType, schemaVersion, isSubsetColl); - if (auto* refs = tmpBuffers.references) { - collBuffers.references = new podio::CollRefCollection(refs->size()); - } - if (auto* vminfo = tmpBuffers.vectorMembers) { - collBuffers.vectorMembers = new podio::VectorMembersInfo(); - collBuffers.vectorMembers->reserve(vminfo->size()); - - for (const auto& [type, _] : (*vminfo)) { - const auto* vecClass = TClass::GetClass(("vector<" + type + ">").c_str()); - collBuffers.vectorMembers->emplace_back(type, vecClass->New()); - } - } - } + // TODO: Error handling of empty optional + auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); const auto localEntry = catInfo.chain->LoadTree(catInfo.entry); // After switching trees in the chain, branch pointers get invalidated so @@ -295,10 +270,7 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str())); } - const std::string bufferClassName = "std::vector<" + collection->getDataTypeName() + ">"; - const auto bufferClass = isSubsetColl ? nullptr : TClass::GetClass(bufferClassName.c_str()); - - storedClasses.emplace_back(name, std::make_tuple(bufferClass, collectionClass, collectionIndex++)); + storedClasses.emplace_back(name, std::make_tuple(collType, isSubsetColl, collSchemaVersion, collectionIndex++)); collBranches.push_back(branches); } diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index 0a9380bd1..0ce48bf77 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -1,3 +1,4 @@ +#include "podio/CollectionBufferFactory.h" #include "podio/CollectionBuffers.h" #include "podio/ROOTFrameData.h" #include "rootUtils.h" @@ -49,40 +50,14 @@ std::unique_ptr ROOTLegacyReader::readEntry() { podio::CollectionReadBuffers ROOTLegacyReader::getCollectionBuffers(const std::pair& collInfo) { const auto& name = collInfo.first; - const auto& [theClass, collectionClass, index] = collInfo.second; + const auto& [collType, isSubsetColl, schemaVersion, index] = collInfo.second; auto& branches = m_collectionBranches[index]; - // Create empty collection buffers, and connect them to the right branches - auto collBuffers = podio::CollectionReadBuffers(); - // If we have a valid data buffer class we know that have to read data, - // otherwise we are handling a subset collection - const bool isSubsetColl = theClass == nullptr; - if (!isSubsetColl) { - collBuffers.data = theClass->New(); - } - - { - auto collection = - std::unique_ptr(static_cast(collectionClass->New())); - collection->setSubsetCollection(isSubsetColl); - - auto tmpBuffers = collection->createBuffers(); - collBuffers.createCollection = std::move(tmpBuffers.createCollection); - collBuffers.recast = std::move(tmpBuffers.recast); + const auto& bufferFactory = podio::CollectionBufferFactory::instance(); + auto maybeBuffers = bufferFactory.createBuffers(collType, schemaVersion, isSubsetColl); - if (auto* refs = tmpBuffers.references) { - collBuffers.references = new podio::CollRefCollection(refs->size()); - } - if (auto* vminfo = tmpBuffers.vectorMembers) { - collBuffers.vectorMembers = new podio::VectorMembersInfo(); - collBuffers.vectorMembers->reserve(vminfo->size()); - - for (const auto& [type, _] : (*vminfo)) { - const auto* vecClass = TClass::GetClass(("vector<" + type + ">").c_str()); - collBuffers.vectorMembers->emplace_back(type, vecClass->New()); - } - } - } + // TODO: Error handling of empty optional + auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); const auto localEntry = m_chain->LoadTree(m_eventNumber); // After switching trees in the chain, branch pointers get invalidated so @@ -216,10 +191,8 @@ void ROOTLegacyReader::createCollectionBranches(const std::vectorgetDataTypeName() + ">"; - const auto bufferClass = isSubsetColl ? nullptr : TClass::GetClass(bufferClassName.c_str()); + m_storedClasses.emplace_back(name, std::make_tuple(collType, isSubsetColl, collSchemaVersion, collectionIndex++)); - m_storedClasses.emplace_back(name, std::make_tuple(bufferClass, collectionClass, collectionIndex++)); m_collectionBranches.push_back(branches); } } diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index c0a514e6a..3c1663536 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -100,7 +100,7 @@ std::shared_ptr SIOBlockFactory::createBlock(const std::string& typeSt if (it != _map.end()) { auto blk = std::shared_ptr(it->second->create(name)); - blk->createBuffers(isSubsetColl); + blk->setSubsetCollection(isSubsetColl); return blk; } else { return nullptr; diff --git a/src/UserDataCollection.cc b/src/UserDataCollection.cc new file mode 100644 index 000000000..71ea34d48 --- /dev/null +++ b/src/UserDataCollection.cc @@ -0,0 +1,54 @@ +#include "podio/UserDataCollection.h" +#include "podio/CollectionBufferFactory.h" +#include "podio/CollectionBuffers.h" + +#include +#include + +namespace podio { + +namespace { + /** + * Helper function to register a UserDataCollection to the + * CollectionBufferFactory. Takes the BasicType as template argument. + * + * Returns an integer so that it can be used with std::apply + */ + template + int registerUserDataCollection(T) { + // Register with schema version 1 to allow for potential changes + CollectionBufferFactory::mutInstance().registerCreationFunc( + userDataCollTypeName(), UserDataCollection::schemaVersion, [](bool) { + return podio::CollectionReadBuffers{new std::vector(), nullptr, nullptr, + [](podio::CollectionReadBuffers buffers, bool) { + return std::make_unique>( + std::move(*buffers.dataAsVector())); + }, + [](podio::CollectionReadBuffers& buffers) { + buffers.data = podio::CollectionWriteBuffers::asVector(buffers.data); + }}; + }); + + return 1; + } + + /** + * Helper function to loop over all types in the SupportedUserDataTypes to + * register the UserDataCollection types. + */ + bool registerUserDataCollections() { + // Use an IILE here to make sure to do the call exactly once + const static auto reg = []() { + std::apply([](auto... x) { std::make_tuple(registerUserDataCollection(x)...); }, SupportedUserDataTypes{}); + return true; + }(); + return reg; + } + + /** + * Invoke the registration function for user data collections at least once + */ + const auto registeredUserData = registerUserDataCollections(); +} // namespace + +} // namespace podio diff --git a/tests/datalayout.yaml b/tests/datalayout.yaml index 2bcf7cb11..369d39b58 100755 --- a/tests/datalayout.yaml +++ b/tests/datalayout.yaml @@ -1,5 +1,5 @@ --- -schema_version : 1 +schema_version : 2 options : # should getters / setters be prefixed with get / set? diff --git a/tests/datalayout_old.yaml b/tests/datalayout_old.yaml index 81a9d5707..eea733ffa 100755 --- a/tests/datalayout_old.yaml +++ b/tests/datalayout_old.yaml @@ -1,5 +1,5 @@ --- -schema_version : 0 +schema_version : 1 options : # should getters / setters be prefixed with get / set? @@ -200,4 +200,4 @@ datatypes : Description: "Datatype with user defined initialization values" Author: "Thomas Madlener" Members: - - int x // some member \ No newline at end of file + - int x // some member diff --git a/tests/schema_evolution.yaml b/tests/schema_evolution.yaml index 8a9e925f8..561f36fbe 100644 --- a/tests/schema_evolution.yaml +++ b/tests/schema_evolution.yaml @@ -1,6 +1,6 @@ --- -from_schema_version : 0 -to_schema_version : 1 +from_schema_version : 1 +to_schema_version : 2 evolutions: From 509dd1a7220941aaf32eb356e06824a3fef9f204 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 9 May 2023 16:01:29 +0200 Subject: [PATCH 059/100] Fix dumping of UserDataCollection (#414) * Fix dumping of UserDataCollection * Add unittest to cover fix --- include/podio/UserDataCollection.h | 2 +- tests/unittest.cpp | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index 78abda49a..4fe575996 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -171,7 +171,7 @@ class UserDataCollection : public CollectionBase { os << "["; if (!_vec.empty()) { os << _vec[0]; - for (size_t i = 0; i < _vec.size(); ++i) { + for (size_t i = 1; i < _vec.size(); ++i) { os << ", " << _vec[i]; } } diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 81090e145..77e14727f 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -1,6 +1,7 @@ // STL #include #include +#include #include #include #include @@ -331,6 +332,18 @@ TEST_CASE("thread-safe prepareForWrite", "[basics][multithread]") { } } +TEST_CASE("UserDataCollection print", "[basics]") { + auto coll = podio::UserDataCollection(); + coll.push_back(1); + coll.push_back(2); + coll.push_back(3); + + std::stringstream sstr; + coll.print(sstr); + + REQUIRE(sstr.str() == "[1, 2, 3]"); +} + /* TEST_CASE("Arrays") { auto obj = ExampleWithArray(); @@ -1115,4 +1128,5 @@ TEST_CASE("JSON", "[json]") { REQUIRE(json["userData"][j] == 3.14f * j); } } + #endif From b3b06aaa57a013326917a541e9c306c877934bcf Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Mon, 15 May 2023 11:20:53 +0200 Subject: [PATCH 060/100] Delete square when running cmake (#410) --- python/CMakeLists.txt | 1 - python/figure.txt | 44 --------------------------------- python/podio_class_generator.py | 24 +++--------------- 3 files changed, 4 insertions(+), 65 deletions(-) delete mode 100755 python/figure.txt diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 43b706390..6f4fd2657 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -5,7 +5,6 @@ SET(podio_PYTHON_DIR ${CMAKE_CURRENT_LIST_DIR} PARENT_SCOPE) set(to_install podio_class_generator.py podio_schema_evolution.py - figure.txt EventStore.py) install(FILES ${to_install} DESTINATION ${podio_PYTHON_INSTALLDIR}) diff --git a/python/figure.txt b/python/figure.txt deleted file mode 100755 index 3de111686..000000000 --- a/python/figure.txt +++ /dev/null @@ -1,44 +0,0 @@ -(lp0 -S'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p1 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p2 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p3 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p4 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p5 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p6 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p7 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p8 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p9 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p10 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p11 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p12 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p13 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p14 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p15 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p16 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;9m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p17 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;202m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p18 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;11m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p19 -aS'\x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[48;5;228m \x1b[0m' -p20 -aS'\x1b[0m' -p21 -a. \ No newline at end of file diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 773055f10..0b37d706c 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -5,15 +5,12 @@ import os import sys import subprocess -import pickle from copy import deepcopy from enum import IntEnum from collections.abc import Mapping from collections import defaultdict -from itertools import zip_longest - import jinja2 from podio.podio_config_reader import PodioConfigReader @@ -26,15 +23,8 @@ REPORT_TEXT = """ PODIO Data Model ================ - Used - {yamlfile} - to create - {nclasses} classes - in - {installdir}/ - Read instructions in - the README.md to run - your first example! + Used {yamlfile} to create {nclasses} classes in {installdir}/ + Read instructions in the README.md to run your first example! """ @@ -171,19 +161,13 @@ def print_report(self): if not self.verbose: return - with open(os.path.join(THIS_DIR, "figure.txt"), 'rb') as pkl: - figure = pickle.load(pkl) - nclasses = 5 * len(self.datamodel.datatypes) + len(self.datamodel.components) text = REPORT_TEXT.format(yamlfile=self.yamlfile, nclasses=nclasses, installdir=self.install_dir) - print() - for figline, summaryline in zip_longest(figure, text.splitlines(), fillvalue=''): - print(figline + summaryline) - print(" 'Homage to the Square' - Josef Albers") - print() + for summaryline in text.splitlines(): + print(summaryline) print() def _eval_template(self, template, data): From e13b3e68bdfcf5b8a121a5cc36bf809949bbb6fb Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Mon, 15 May 2023 17:10:38 +0200 Subject: [PATCH 061/100] Remove the deprecated getters and setters from the generic parameters (#415) * Remove the deprecated getters and setters from the generic parameters * Add some friends not to make public the getMap functions --- include/podio/GenericParameters.h | 116 ++---------------------------- src/GenericParameters.cc | 78 -------------------- src/SIOBlock.cc | 16 ++--- 3 files changed, 15 insertions(+), 195 deletions(-) diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index 965bee334..773c7e9ba 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -12,7 +12,11 @@ #include #include -#define DEPRECATED_ACCESS [[deprecated("Use templated access functionality")]] +namespace sio { +class read_device; +class write_device; +using version_type = uint32_t; // from sio/definitions +} // namespace sio namespace podio { @@ -55,12 +59,6 @@ namespace detail { template using GenericDataReturnType = typename detail::GenericDataReturnTypeHelper::type; -// These should be trivial to remove once the deprecated non-templated access -// functionality is actually removed -typedef std::vector IntVec; -typedef std::vector FloatVec; -typedef std::vector StringVec; - /** GenericParameters objects allow to store generic named parameters of type * int, float and string or vectors of these types. * They can be used to store (user) meta data that is @@ -132,69 +130,6 @@ class GenericParameters { template > std::vector getKeys() const; - /** Returns the first integer value for the given key. - */ - DEPRECATED_ACCESS int getIntVal(const std::string& key) const; - - /** Returns the first float value for the given key. - */ - DEPRECATED_ACCESS float getFloatVal(const std::string& key) const; - - /** Returns the first string value for the given key. - */ - DEPRECATED_ACCESS const std::string& getStringVal(const std::string& key) const; - - /** Adds all integer values for the given key to values. - * Returns a reference to values for convenience. - */ - DEPRECATED_ACCESS IntVec& getIntVals(const std::string& key, IntVec& values) const; - - /** Adds all float values for the given key to values. - * Returns a reference to values for convenience. - */ - DEPRECATED_ACCESS FloatVec& getFloatVals(const std::string& key, FloatVec& values) const; - - /** Adds all float values for the given key to values. - * Returns a reference to values for convenience. - */ - DEPRECATED_ACCESS StringVec& getStringVals(const std::string& key, StringVec& values) const; - - /** Returns a list of all keys of integer parameters. - */ - DEPRECATED_ACCESS const StringVec& getIntKeys(StringVec& keys) const; - - /** Returns a list of all keys of float parameters. - */ - DEPRECATED_ACCESS const StringVec& getFloatKeys(StringVec& keys) const; - - /** Returns a list of all keys of string parameters. - */ - DEPRECATED_ACCESS const StringVec& getStringKeys(StringVec& keys) const; - - /** The number of integer values stored for this key. - */ - DEPRECATED_ACCESS int getNInt(const std::string& key) const; - - /** The number of float values stored for this key. - */ - DEPRECATED_ACCESS int getNFloat(const std::string& key) const; - - /** The number of string values stored for this key. - */ - DEPRECATED_ACCESS int getNString(const std::string& key) const; - - /** Set integer values for the given key. - */ - DEPRECATED_ACCESS void setValues(const std::string& key, const IntVec& values); - - /** Set float values for the given key. - */ - DEPRECATED_ACCESS void setValues(const std::string& key, const FloatVec& values); - - /** Set string values for the given key. - */ - DEPRECATED_ACCESS void setValues(const std::string& key, const StringVec& values); - /// erase all elements void clear() { _intMap.clear(); @@ -209,45 +144,8 @@ class GenericParameters { return _intMap.empty() && _floatMap.empty() && _stringMap.empty(); } - /** - * Get the internal int map (necessary for serialization with SIO) - */ - const IntMap& getIntMap() const { - return getMap(); - } - IntMap& getIntMap() { - return getMap(); - } - - /** - * Get the internal float map (necessary for serialization with SIO) - */ - const FloatMap& getFloatMap() const { - return getMap(); - } - FloatMap& getFloatMap() { - return getMap(); - } - - /** - * Get the internal double map (necessary for serialization with SIO) - */ - const DoubleMap& getDoubleMap() const { - return getMap(); - } - DoubleMap& getDoubleMap() { - return getMap(); - } - - /** - * Get the internal string map (necessary for serialization with SIO) - */ - const StringMap& getStringMap() const { - return getMap(); - } - StringMap& getStringMap() { - return getMap(); - } + friend void writeGenericParameters(sio::write_device& device, const GenericParameters& parameters); + friend void readGenericParameters(sio::read_device& device, GenericParameters& parameters, sio::version_type version); private: /// Get a reference to the internal map for a given type (necessary for SIO) diff --git a/src/GenericParameters.cc b/src/GenericParameters.cc index 1307d5335..758fefa74 100644 --- a/src/GenericParameters.cc +++ b/src/GenericParameters.cc @@ -32,84 +32,6 @@ GenericParameters::GenericParameters(const GenericParameters& other) : } } -int GenericParameters::getIntVal(const std::string& key) const { - return getValue(key); -} - -float GenericParameters::getFloatVal(const std::string& key) const { - return getValue(key); -} - -const std::string& GenericParameters::getStringVal(const std::string& key) const { - return getValue(key); -} - -IntVec& GenericParameters::getIntVals(const std::string& key, IntVec& values) const { - for (const auto v : getValue>(key)) { - values.push_back(v); - } - return values; -} - -FloatVec& GenericParameters::getFloatVals(const std::string& key, FloatVec& values) const { - for (const auto v : getValue>(key)) { - values.push_back(v); - } - return values; -} - -StringVec& GenericParameters::getStringVals(const std::string& key, StringVec& values) const { - for (const auto& v : getValue>(key)) { - values.push_back(v); - } - return values; -} - -const StringVec& GenericParameters::getIntKeys(StringVec& keys) const { - for (const auto& k : getKeys()) { - keys.push_back(k); - } - return keys; -} - -const StringVec& GenericParameters::getFloatKeys(StringVec& keys) const { - for (const auto& k : getKeys()) { - keys.push_back(k); - } - return keys; -} - -const StringVec& GenericParameters::getStringKeys(StringVec& keys) const { - for (const auto& k : getKeys()) { - keys.push_back(k); - } - return keys; -} - -int GenericParameters::getNInt(const std::string& key) const { - return getN(key); -} - -int GenericParameters::getNFloat(const std::string& key) const { - return getN(key); -} - -int GenericParameters::getNString(const std::string& key) const { - return getN(key); -} - -void GenericParameters::setValues(const std::string& key, const IntVec& values) { - setValue(key, values); -} - -void GenericParameters::setValues(const std::string& key, const FloatVec& values) { - setValue(key, values); -} - -void GenericParameters::setValues(const std::string& key, const StringVec& values) { - setValue(key, values); -} - template std::ostream& operator<<(std::ostream& os, const std::vector& values) { os << "["; diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index 3c1663536..981f7df47 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -50,18 +50,18 @@ void SIOCollectionIDTableBlock::write(sio::write_device& device) { } void writeGenericParameters(sio::write_device& device, const GenericParameters& params) { - writeMapLike(device, params.getIntMap()); - writeMapLike(device, params.getFloatMap()); - writeMapLike(device, params.getStringMap()); - writeMapLike(device, params.getDoubleMap()); + writeMapLike(device, params.getMap()); + writeMapLike(device, params.getMap()); + writeMapLike(device, params.getMap()); + writeMapLike(device, params.getMap()); } void readGenericParameters(sio::read_device& device, GenericParameters& params, sio::version_type version) { - readMapLike(device, params.getIntMap()); - readMapLike(device, params.getFloatMap()); - readMapLike(device, params.getStringMap()); + readMapLike(device, params.getMap()); + readMapLike(device, params.getMap()); + readMapLike(device, params.getMap()); if (version >= sio::version::encode_version(0, 2)) { - readMapLike(device, params.getDoubleMap()); + readMapLike(device, params.getMap()); } } From a2890154aebeacaec86b7cd07fd13973b84d6724 Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Fri, 19 May 2023 09:28:26 +0200 Subject: [PATCH 062/100] Remove selection rules for these classes that don't exist anymore (#416) Co-authored-by: jmcarcell --- src/selection.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/selection.xml b/src/selection.xml index 92ca68764..b1a9694c1 100644 --- a/src/selection.xml +++ b/src/selection.xml @@ -6,9 +6,6 @@ - - - From 7596120cbc5d1ee9ea4fe385ed96a2101b568ed3 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 22 May 2023 03:31:57 -0400 Subject: [PATCH 063/100] Revive public `getMap` functionality for `GenericParameters` (#418) * Make string/float/int maps available again from GenericParameters * Remove unnecessary typedefs --- include/podio/GenericParameters.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index 773c7e9ba..311b622b5 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -18,6 +18,9 @@ class write_device; using version_type = uint32_t; // from sio/definitions } // namespace sio +#define DEPR_NON_TEMPLATE \ + [[deprecated("Non-templated access will be removed. Switch to templated access functionality")]] + namespace podio { /// The types which are supported in the GenericParameters @@ -75,10 +78,6 @@ class GenericParameters { using MapType = std::map>; private: - using IntMap = MapType; - using FloatMap = MapType; - using DoubleMap = MapType; - using StringMap = MapType; // need mutex pointers for having the possibility to copy/move GenericParameters using MutexPtr = std::unique_ptr; @@ -147,8 +146,7 @@ class GenericParameters { friend void writeGenericParameters(sio::write_device& device, const GenericParameters& parameters); friend void readGenericParameters(sio::read_device& device, GenericParameters& parameters, sio::version_type version); -private: - /// Get a reference to the internal map for a given type (necessary for SIO) + /// Get a reference to the internal map for a given type template const MapType>& getMap() const { if constexpr (std::is_same_v, int>) { @@ -162,6 +160,19 @@ class GenericParameters { } } + DEPR_NON_TEMPLATE const auto& getStringMap() const { + return getMap(); + } + + DEPR_NON_TEMPLATE const auto& getFloatMap() const { + return getMap(); + } + + DEPR_NON_TEMPLATE const auto& getIntMap() const { + return getMap(); + } + +private: /// Get a reference to the internal map for a given type (necessary for SIO) template MapType>& getMap() { @@ -191,13 +202,13 @@ class GenericParameters { } private: - IntMap _intMap{}; ///< The map storing the integer values + MapType _intMap{}; ///< The map storing the integer values mutable MutexPtr m_intMtx{nullptr}; ///< The mutex guarding the integer map - FloatMap _floatMap{}; ///< The map storing the float values + MapType _floatMap{}; ///< The map storing the float values mutable MutexPtr m_floatMtx{nullptr}; ///< The mutex guarding the float map - StringMap _stringMap{}; ///< The map storing the string values + MapType _stringMap{}; ///< The map storing the string values mutable MutexPtr m_stringMtx{nullptr}; ///< The mutex guarding the string map - DoubleMap _doubleMap{}; ///< The map storing the double values + MapType _doubleMap{}; ///< The map storing the double values mutable MutexPtr m_doubleMtx{nullptr}; ///< The mutex guarding the double map }; From fec860991c8daf4667e02923297d37964ecbe9ec Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 23 May 2023 14:29:56 +0200 Subject: [PATCH 064/100] Fix reading of multiple files with ROOTFrameReader (#417) * Add documentation for opening of files API * Fix seg fault when reading from multiple files * Add tests and properly fix file switching * Ignore new test in sanitizer runs --- include/podio/ROOTFrameReader.h | 27 ++++++++- src/ROOTFrameReader.cc | 36 ++++++++---- tests/CMakeLists.txt | 5 +- tests/CTestCustom.cmake | 1 + tests/read_frame_root_multiple.cpp | 89 ++++++++++++++++++++++++++++++ 5 files changed, 143 insertions(+), 15 deletions(-) create mode 100644 tests/read_frame_root_multiple.cpp diff --git a/include/podio/ROOTFrameReader.h b/include/podio/ROOTFrameReader.h index 3c493e4ca..c152c3772 100644 --- a/include/podio/ROOTFrameReader.h +++ b/include/podio/ROOTFrameReader.h @@ -52,8 +52,27 @@ class ROOTFrameReader { ROOTFrameReader(const ROOTFrameReader&) = delete; ROOTFrameReader& operator=(const ROOTFrameReader&) = delete; + /** + * Open a single file for reading. + * + * @param filename The name of the input file + */ void openFile(const std::string& filename); + /** + * Open multiple files for reading and then treat them as if they are one file + * + * NOTE: All of the files are assumed to have the same structure. Specifically + * this means: + * - The same categories are available from all files + * - The collections that are contained in the individual categories are the + * same across all files + * + * This usually boils down to "the files have been written with the same + * settings", e.g. they are outputs of a batched process. + * + * @param filenames The filenames of all input files that should be read + */ void openFiles(const std::vector& filenames); /** @@ -125,7 +144,10 @@ class ROOTFrameReader { */ CategoryInfo& getCategoryInfo(const std::string& name); - GenericParameters readEventMetaData(CategoryInfo& catInfo); + /** + * Read the parameters for the entry specified in the passed CategoryInfo + */ + GenericParameters readEntryParameters(CategoryInfo& catInfo, bool reloadBranches, unsigned int localEntry); /** * Read the data entry specified in the passed CategoryInfo, and increase the @@ -137,7 +159,8 @@ class ROOTFrameReader { /** * Get / read the buffers at index iColl in the passed category information */ - podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl); + podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl, bool reloadBranches, + unsigned int localEntry); std::unique_ptr m_metaChain{nullptr}; ///< The metadata tree std::unordered_map m_categories{}; ///< All categories diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index d150789df..85c67b116 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -22,15 +22,22 @@ std::tuple, std::vector& collInfo); -GenericParameters ROOTFrameReader::readEventMetaData(ROOTFrameReader::CategoryInfo& catInfo) { +GenericParameters ROOTFrameReader::readEntryParameters(ROOTFrameReader::CategoryInfo& catInfo, bool reloadBranches, + unsigned int localEntry) { // Parameter branch is always the last one auto& paramBranches = catInfo.branches.back(); + + // Make sure to have a valid branch pointer after switching trees in the chain + // as well as on the first event + if (reloadBranches) { + paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName); + } auto* branch = paramBranches.data; GenericParameters params; auto* emd = ¶ms; branch->SetAddress(&emd); - branch->GetEntry(catInfo.entry); + branch->GetEntry(localEntry); return params; } @@ -53,19 +60,29 @@ std::unique_ptr ROOTFrameReader::readEntry(ROOTFrameReader::Categ return nullptr; } + // After switching trees in the chain, branch pointers get invalidated so + // they need to be reassigned. + // NOTE: root 6.22/06 requires that we get completely new branches here, + // with 6.20/04 we could just re-set them + const auto preTreeNo = catInfo.chain->GetTreeNumber(); + const auto localEntry = catInfo.chain->LoadTree(catInfo.entry); + const auto treeChange = catInfo.chain->GetTreeNumber() != preTreeNo; + // Also need to make sure to handle the first event + const auto reloadBranches = treeChange || localEntry == 0; + ROOTFrameData::BufferMap buffers; for (size_t i = 0; i < catInfo.storedClasses.size(); ++i) { - buffers.emplace(catInfo.storedClasses[i].first, getCollectionBuffers(catInfo, i)); + buffers.emplace(catInfo.storedClasses[i].first, getCollectionBuffers(catInfo, i, reloadBranches, localEntry)); } - auto parameters = readEventMetaData(catInfo); + auto parameters = readEntryParameters(catInfo, reloadBranches, localEntry); catInfo.entry++; return std::make_unique(std::move(buffers), catInfo.table, std::move(parameters)); } -podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameReader::CategoryInfo& catInfo, - size_t iColl) { +podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameReader::CategoryInfo& catInfo, size_t iColl, + bool reloadBranches, unsigned int localEntry) { const auto& name = catInfo.storedClasses[iColl].first; const auto& [collType, isSubsetColl, schemaVersion, index] = catInfo.storedClasses[iColl].second; auto& branches = catInfo.branches[index]; @@ -76,12 +93,7 @@ podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameRead // TODO: Error handling of empty optional auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); - const auto localEntry = catInfo.chain->LoadTree(catInfo.entry); - // After switching trees in the chain, branch pointers get invalidated so - // they need to be reassigned. - // NOTE: root 6.22/06 requires that we get completely new branches here, - // with 6.20/04 we could just re-set them - if (localEntry == 0) { + if (reloadBranches) { branches.data = root_utils::getBranch(catInfo.chain.get(), name.c_str()); // reference collections diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 92263ea8e..234323219 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -64,7 +64,9 @@ set(root_dependent_tests read_timed.cpp read_frame.cpp write_frame_root.cpp - read_frame_legacy_root.cpp) + read_frame_legacy_root.cpp + read_frame_root_multiple.cpp + ) set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO) foreach( sourcefile ${root_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${root_libs}") @@ -168,6 +170,7 @@ set_property(TEST read_and_write PROPERTY DEPENDS write) set_property(TEST read_frame_legacy_root PROPERTY DEPENDS write) set_property(TEST read_timed PROPERTY DEPENDS write_timed) set_property(TEST read_frame PROPERTY DEPENDS write_frame_root) +set_property(TEST read_frame_root_multiple PROPERTY DEPENDS write_frame_root) add_executable(check_benchmark_outputs check_benchmark_outputs.cpp) target_link_libraries(check_benchmark_outputs PRIVATE ROOT::Tree) diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index 71812378a..f39cebcdb 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -21,6 +21,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read-multiple read-legacy-files read_frame_legacy_root + read_frame_root_multiple write_frame_root read_frame diff --git a/tests/read_frame_root_multiple.cpp b/tests/read_frame_root_multiple.cpp new file mode 100644 index 000000000..85dcd316b --- /dev/null +++ b/tests/read_frame_root_multiple.cpp @@ -0,0 +1,89 @@ +#include "read_frame.h" + +#include "podio/ROOTFrameReader.h" + +int read_frames(podio::ROOTFrameReader& reader) { + if (reader.currentFileVersion() != podio::version::build_version) { + std::cerr << "The podio build version could not be read back correctly. " + << "(expected:" << podio::version::build_version << ", actual: " << reader.currentFileVersion() << ")" + << std::endl; + return 1; + } + + if (reader.getEntries("events") != 20) { + std::cerr << "Could not read back the number of events correctly. " + << "(expected:" << 20 << ", actual: " << reader.getEntries("events") << ")" << std::endl; + return 1; + } + + if (reader.getEntries("events") != reader.getEntries("other_events")) { + std::cerr << "Could not read back the number of events correctly. " + << "(expected:" << 20 << ", actual: " << reader.getEntries("other_events") << ")" << std::endl; + return 1; + } + + // Read the frames in a different order than when writing them here to make + // sure that the writing/reading order does not impose any usage requirements + for (size_t i = 0; i < reader.getEntries("events"); ++i) { + auto frame = podio::Frame(reader.readNextEntry("events")); + if (frame.get("emptySubsetColl") == nullptr) { + std::cerr << "Could not retrieve an empty subset collection" << std::endl; + return 1; + } + if (frame.get("emptyCollection") == nullptr) { + std::cerr << "Could not retrieve an empty collection" << std::endl; + return 1; + } + + processEvent(frame, (i % 10), reader.currentFileVersion()); + + auto otherFrame = podio::Frame(reader.readNextEntry("other_events")); + processEvent(otherFrame, (i % 10) + 100, reader.currentFileVersion()); + // The other_events category also holds external collections + processExtensions(otherFrame, (i % 10) + 100, reader.currentFileVersion()); + } + + if (reader.readNextEntry("events")) { + std::cerr << "Trying to read more frame data than is present should return a nullptr" << std::endl; + return 1; + } + + std::cout << "========================================================\n" << std::endl; + if (reader.readNextEntry("not_present")) { + std::cerr << "Trying to read non-existant frame data should return a nullptr" << std::endl; + return 1; + } + + // Reading specific (jumping to) entry + { + auto frame = podio::Frame(reader.readEntry("events", 4)); + processEvent(frame, 4, reader.currentFileVersion()); + // Reading the next entry after jump, continues from after the jump + auto nextFrame = podio::Frame(reader.readNextEntry("events")); + processEvent(nextFrame, 5, reader.currentFileVersion()); + + // Jump over a file boundary and make sure that works + auto otherFrame = podio::Frame(reader.readEntry("other_events", 14)); + processEvent(otherFrame, 4 + 100, reader.currentFileVersion()); + processExtensions(otherFrame, 4 + 100, reader.currentFileVersion()); + + // Jumping back also works + auto previousFrame = podio::Frame(reader.readEntry("other_events", 2)); + processEvent(previousFrame, 2 + 100, reader.currentFileVersion()); + processExtensions(previousFrame, 2 + 100, reader.currentFileVersion()); + } + + // Trying to read a Frame that is not present returns a nullptr + if (reader.readEntry("events", 30)) { + std::cerr << "Trying to read a specific entry that does not exist should return a nullptr" << std::endl; + return 1; + } + + return 0; +} + +int main() { + auto reader = podio::ROOTFrameReader(); + reader.openFiles({"example_frame.root", "example_frame.root"}); + return read_frames(reader); +} From 1a9ca5b70678b6bda07f129d405f8ddf149a6a30 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 23 May 2023 14:51:07 +0200 Subject: [PATCH 065/100] Release Notes for v00-16-04 --- doc/ReleaseNotes.md | 59 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/doc/ReleaseNotes.md b/doc/ReleaseNotes.md index 829399724..9950a4e5c 100644 --- a/doc/ReleaseNotes.md +++ b/doc/ReleaseNotes.md @@ -1,3 +1,62 @@ +# v00-16-04 + +* 2023-05-23 tmadlener ([PR#417](https://github.com/AIDASoft/podio/pull/417)) + - Fix an issue with reading multiple files via the `ROOTFrameReader` ([#411](https://github.com/AIDASoft/podio/issues/411)) + - Add documentation for API of opening file(s) + - Add tests for reading multiple files + +* 2023-05-22 tmadlener ([PR#418](https://github.com/AIDASoft/podio/pull/418)) + - Bring back the public templated `getMap` functionality for `podio::GenericParameters` as they are already used in DD4hep (see [AIDASoft/DD4hep#1112](https://github.com/AIDASoft/DD4hep/pull/1112)). + - Mark the existing `getXYZMap` as deprecated but keep them for a brief transition period. + - These have been removed in [#415](https://github.com/AIDASoft/podio/pull/415). + +* 2023-05-19 jmcarcell ([PR#416](https://github.com/AIDASoft/podio/pull/416)) + - Remove selection rules for classes that don't exist anymore + +* 2023-05-15 jmcarcell ([PR#415](https://github.com/AIDASoft/podio/pull/415)) + - Remove the deprecated getters and setters from the generic parameters + +* 2023-05-15 jmcarcell ([PR#410](https://github.com/AIDASoft/podio/pull/410)) + - Remove the square that is run when cmake runs + +* 2023-05-09 tmadlener ([PR#414](https://github.com/AIDASoft/podio/pull/414)) + - Fix off-by-one error in `UserDataCollection::print` that caused the first element to be printed twice. + +* 2023-05-09 Thomas Madlener ([PR#394](https://github.com/AIDASoft/podio/pull/394)) + - Introduce a `CollectionBufferFactory` that can create the necessary buffers from a collection type, a schema version and a subset collection flag. + - Use this factory throughout all existing Readers + - Remove `createBuffers` and `createSchemaEvolvableBuffers` from `podio::CollectionBase` interface + - Make the minimum allowed `schema_version` 1 in the yaml definition files. Default to 1 if no `schema_version` is provided + - Add a `schemaVersion` to the `DatamodelDefinition.h` header that is generated and that can be accessed via `{{ package_name }}::meta::schemaVersion`. Use this to propagate schema information to the necessary places. + - Make `SIOBlocks` write the current schema version, such that on reading they can generate the appropriate buffers for the version on file. + +* 2023-04-22 Christopher Dilks ([PR#408](https://github.com/AIDASoft/podio/pull/408)) + - fix type inconsistency between `Collection::size()` and index for const object accessors + +* 2023-04-21 jmcarcell ([PR#387](https://github.com/AIDASoft/podio/pull/387)) + - Make sure that the dump model round trip tests work without `ENABLE_SIO` + - Actually test the extension model dumping + +* 2023-04-12 Thomas Madlener ([PR#400](https://github.com/AIDASoft/podio/pull/400)) + - Fix a bug in `SIOFrameData::getAvailableCollections` to also work with Frames where some of the collections have not been written and that could lead to a seg fault. + - Add a test for this in c++ (previously only covered in python unittests of Frame). + +* 2023-04-05 Thomas Madlener ([PR#399](https://github.com/AIDASoft/podio/pull/399)) + - Add `PODIO_ENABLE_SIO=1` to the public `target_compile_definitions` for `podioSioIO` so that all dependent targets automatically get it as well. This should make it easier to use SIO dependent features in dependencies. + - Consistently use a scope for `target_link_libraries` in tests. + +* 2023-04-03 Paul Gessinger-Befurt ([PR#398](https://github.com/AIDASoft/podio/pull/398)) + - Do not reject building if ROOT was built with C++20 (instead of C++17). + +* 2023-04-03 Thomas Madlener ([PR#397](https://github.com/AIDASoft/podio/pull/397)) + - Remove the `GENERATED` property from generated files in CMake to avoid inconsistent removal of headers and source files with the `clean` target. Fixes [#396](https://github.com/AIDASoft/podio/issues/396) + +* 2023-03-15 Benedikt Hegner ([PR#341](https://github.com/AIDASoft/podio/pull/341)) + - Adding infrastructure for schema evolution + - Added explicit version tracking to the metadata + - Data model comparison tool w/ simple heuristics to identify potential omissions / mistakes (e.g. checking for the limits of the ROOT backend) + - Changed handling of backwards compatibility for the collection info metadata + # v00-16-03 * 2023-03-14 jmcarcell ([PR#391](https://github.com/AIDASoft/podio/pull/391)) From 904e5725dadf012f0fa42a1934a3320c8a31b2d4 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 23 May 2023 14:51:09 +0200 Subject: [PATCH 066/100] Updating version to v00-16-04 --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c4f3a96ac..e936aa3fb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,8 +7,8 @@ project(podio) #--- Version ------------------------------------------------------------------- SET( ${PROJECT_NAME}_VERSION_MAJOR 0 ) -SET( ${PROJECT_NAME}_VERSION_MINOR 17 ) -SET( ${PROJECT_NAME}_VERSION_PATCH 0 ) +SET( ${PROJECT_NAME}_VERSION_MINOR 16 ) +SET( ${PROJECT_NAME}_VERSION_PATCH 4 ) SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" ) From 0aa94e0e6792b13554323678b90ed0d6472e6576 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 23 May 2023 17:30:45 +0200 Subject: [PATCH 067/100] Fix a version check inside the ROOTReader to avoid seg faults (#420) --- src/ROOTReader.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 781c1aea1..d6df62408 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -177,7 +177,7 @@ void ROOTReader::openFiles(const std::vector& filenames) { const auto collectionInfo = root_utils::reconstructCollectionInfo(m_chain, *m_table); createCollectionBranches(collectionInfo); - } else if (m_fileVersion < podio::version::Version{0, 17, 0}) { + } else if (m_fileVersion < podio::version::Version{0, 16, 4}) { auto* collInfoBranch = root_utils::getBranch(metadatatree, "CollectionTypeInfo"); auto collectionInfoWithoutSchema = new std::vector; From c2f99da4c7e83cb211e6cddfc524080961923aed Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 23 May 2023 18:25:44 +0200 Subject: [PATCH 068/100] Release Notes for v00-16-05 --- doc/ReleaseNotes.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/ReleaseNotes.md b/doc/ReleaseNotes.md index 9950a4e5c..2da32ee2f 100644 --- a/doc/ReleaseNotes.md +++ b/doc/ReleaseNotes.md @@ -1,3 +1,8 @@ +# v00-16-05 + +* 2023-05-23 tmadlener ([PR#420](https://github.com/AIDASoft/podio/pull/420)) + - Fix a version check inside the `ROOTReader` to avoid segmentation violations + # v00-16-04 * 2023-05-23 tmadlener ([PR#417](https://github.com/AIDASoft/podio/pull/417)) From 76c98a6e76ec25d37e1e726fb5afa432d84e8420 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 23 May 2023 18:25:45 +0200 Subject: [PATCH 069/100] Updating version to v00-16-05 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e936aa3fb..700536436 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ project(podio) #--- Version ------------------------------------------------------------------- SET( ${PROJECT_NAME}_VERSION_MAJOR 0 ) SET( ${PROJECT_NAME}_VERSION_MINOR 16 ) -SET( ${PROJECT_NAME}_VERSION_PATCH 4 ) +SET( ${PROJECT_NAME}_VERSION_PATCH 5 ) SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" ) From 4a767cf90086ef06ef8e27e425b1edb97c063e58 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 30 May 2023 16:25:44 +0200 Subject: [PATCH 070/100] Make retrieval of parameters more robust in Frame python bindings (#422) --- python/podio/frame.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/podio/frame.py b/python/podio/frame.py index 70418451c..4822b00df 100644 --- a/python/podio/frame.py +++ b/python/podio/frame.py @@ -136,9 +136,9 @@ def get_parameter(self, name, as_type=None): """ def _get_param_value(par_type, name): par_value = self._frame.getParameter[par_type](name) - if len(par_value) > 1: - return list(par_value) - return par_value[0] + if len(par_value) == 1: + return par_value[0] + return list(par_value) # This access already raises the KeyError if there is no such parameter par_type = self._param_key_types[name] From 8a3b2ff5b14fdd2882c0f85f158ea1c04d70d6e3 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 5 Jun 2023 10:37:25 +0200 Subject: [PATCH 071/100] Add more tests for legacy files (#423) * Introduce version subdirectory for legacy tests * Rename executable and add more legacy versions for testing * Introduce macro to cleanup CMakeLists slightly * Rename frame reading test * Add frame reading tests for legacy files * Don't check build versions in legacy files * Fix reading of v00-16 files and adapt tests to expected contents * Add legacy file reading test to ignore list for sanitizer builds * Make sure to properly configure CTestCustom.cmake --- src/ROOTFrameReader.cc | 17 +++++++-- src/ROOTReader.cc | 2 +- src/rootUtils.h | 2 +- tests/CMakeLists.txt | 35 +++++++++++++------ tests/CTestCustom.cmake | 9 +++-- ...y-files.cpp => read-legacy-files-root.cpp} | 0 tests/read_frame.cpp | 9 ----- tests/read_frame.h | 32 ++++++++++------- tests/read_frame_auxiliary.h | 8 ++++- tests/read_frame_root.cpp | 19 ++++++++++ tests/scripts/get_test_inputs.sh | 9 +++++ 11 files changed, 104 insertions(+), 38 deletions(-) rename tests/{read-legacy-files.cpp => read-legacy-files-root.cpp} (100%) delete mode 100644 tests/read_frame.cpp create mode 100644 tests/read_frame_root.cpp diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index 85c67b116..c1a7461d8 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -147,9 +147,22 @@ void ROOTFrameReader::initCategory(CategoryInfo& catInfo, const std::string& cat tableBranch->GetEntry(0); auto* collInfoBranch = root_utils::getBranch(m_metaChain.get(), root_utils::collInfoName(category)); + auto collInfo = new std::vector(); - collInfoBranch->SetAddress(&collInfo); - collInfoBranch->GetEntry(0); + if (m_fileVersion < podio::version::Version{0, 16, 4}) { + auto oldCollInfo = new std::vector(); + collInfoBranch->SetAddress(&oldCollInfo); + collInfoBranch->GetEntry(0); + collInfo->reserve(oldCollInfo->size()); + for (auto&& [collID, collType, isSubsetColl] : *oldCollInfo) { + // Manually set the schema version to 1 + collInfo->emplace_back(collID, std::move(collType), isSubsetColl, 1u); + } + delete oldCollInfo; + } else { + collInfoBranch->SetAddress(&collInfo); + collInfoBranch->GetEntry(0); + } std::tie(catInfo.branches, catInfo.storedClasses) = createCollectionBranches(catInfo.chain.get(), *catInfo.table, *collInfo); diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index d6df62408..57bf1500f 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -180,7 +180,7 @@ void ROOTReader::openFiles(const std::vector& filenames) { } else if (m_fileVersion < podio::version::Version{0, 16, 4}) { auto* collInfoBranch = root_utils::getBranch(metadatatree, "CollectionTypeInfo"); - auto collectionInfoWithoutSchema = new std::vector; + auto collectionInfoWithoutSchema = new std::vector; auto collectionInfo = new std::vector; collInfoBranch->SetAddress(&collectionInfoWithoutSchema); metadatatree->GetEntry(0); diff --git a/src/rootUtils.h b/src/rootUtils.h index 2ad69389b..b4859bbf8 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -105,7 +105,7 @@ inline void setCollectionAddresses(const BufferT& collBuffers, const CollectionB // collection, and its schema version using CollectionInfoT = std::tuple; // for backwards compatibility -using CollectionInfoTWithoutSchema = std::tuple; +using CollectionInfoWithoutSchemaT = std::tuple; inline void readBranchesData(const CollectionBranches& branches, Long64_t entry) { // Read all data diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 234323219..ae5b0f7f4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -62,7 +62,7 @@ set(root_dependent_tests read_and_write_associated.cpp write_timed.cpp read_timed.cpp - read_frame.cpp + read_frame_root.cpp write_frame_root.cpp read_frame_legacy_root.cpp read_frame_root_multiple.cpp @@ -97,14 +97,29 @@ endif() # legacy file read test if (DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR}) message(STATUS "Using test inputs stored in: " ${PODIO_TEST_INPUT_DATA_DIR}) - add_executable(read-legacy-files read-legacy-files.cpp) - target_link_libraries(read-legacy-files PRIVATE TestDataModel TestDataModelDict podio::podioRootIO) - add_test(NAME read-legacy-files COMMAND read-legacy-files ${PODIO_TEST_INPUT_DATA_DIR}/example.root) + add_executable(read-legacy-files-root read-legacy-files-root.cpp) + target_link_libraries(read-legacy-files-root PRIVATE TestDataModel TestDataModelDict podio::podioRootIO) + + # Add a legacy test case based on a base executable and a version for which an + # input file exists + macro(ADD_PODIO_LEGACY_TEST version base_test input_file) + add_test(NAME ${base_test}_${version} COMMAND ${base_test} ${PODIO_TEST_INPUT_DATA_DIR}/${version}/${input_file}) + set_property(TEST ${base_test}_${version} PROPERTY ENVIRONMENT + LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH} + # Clear the ROOT_INCLUDE_PATH for the tests, to avoid potential conflicts + # with existing headers from other installations + ROOT_INCLUDE_PATH= + ) + endmacro() + + ADD_PODIO_LEGACY_TEST(v00-13 read-legacy-files-root example.root legacy_test_cases) + + set(legacy_versions v00-16 v00-16-05) + foreach(version IN LISTS legacy_versions) + ADD_PODIO_LEGACY_TEST(${version} read-legacy-files-root example.root legacy_test_cases) + ADD_PODIO_LEGACY_TEST(${version} read_frame_root example_frame.root legacy_test_cases) + endforeach() - set_property(TEST read-legacy-files PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH} - ROOT_INCLUDE_PATH= - ) endif() CREATE_PODIO_TEST(ostream_operator.cpp "") @@ -169,7 +184,7 @@ set_property(TEST read-multiple PROPERTY DEPENDS write) set_property(TEST read_and_write PROPERTY DEPENDS write) set_property(TEST read_frame_legacy_root PROPERTY DEPENDS write) set_property(TEST read_timed PROPERTY DEPENDS write_timed) -set_property(TEST read_frame PROPERTY DEPENDS write_frame_root) +set_property(TEST read_frame_root PROPERTY DEPENDS write_frame_root) set_property(TEST read_frame_root_multiple PROPERTY DEPENDS write_frame_root) add_executable(check_benchmark_outputs check_benchmark_outputs.cpp) @@ -203,7 +218,7 @@ if (TARGET write_sio) endif() # Customize CTest to potentially disable some of the tests with known problems -configure_file(CTestCustom.cmake ${CMAKE_BINARY_DIR}/CTestCustom.cmake) +configure_file(CTestCustom.cmake ${CMAKE_BINARY_DIR}/CTestCustom.cmake @ONLY) find_package(Threads REQUIRED) add_executable(unittest unittest.cpp frame.cpp) diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index f39cebcdb..f435ebc7c 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -19,12 +19,12 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read_timed check_benchmark_outputs read-multiple - read-legacy-files + read-legacy-files-root_v00-13 read_frame_legacy_root read_frame_root_multiple write_frame_root - read_frame + read_frame_root write_sio read_sio @@ -60,6 +60,11 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ datamodel_def_store_roundtrip_sio_extension ) + foreach(version in @legacy_versions@) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE read-legacy-files-root_${version}) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE read_frame_root_${version}) + endforeach() + # ostream_operator is working with Memory sanitizer (at least locally) if("@USE_SANITIZER@" MATCHES "Memory(WithOrigin)?") list(REMOVE_ITEM CTEST_CUSTOM_TESTS_IGNORE ostream_operator) diff --git a/tests/read-legacy-files.cpp b/tests/read-legacy-files-root.cpp similarity index 100% rename from tests/read-legacy-files.cpp rename to tests/read-legacy-files-root.cpp diff --git a/tests/read_frame.cpp b/tests/read_frame.cpp deleted file mode 100644 index 2b25b4cf6..000000000 --- a/tests/read_frame.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include "read_frame.h" -#include "read_frame_auxiliary.h" - -#include "podio/ROOTFrameReader.h" - -int main() { - return read_frames("example_frame.root") + - test_frame_aux_info("example_frame.root"); -} diff --git a/tests/read_frame.h b/tests/read_frame.h index 76d4f0057..69a48e341 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -61,7 +61,7 @@ void processExtensions(const podio::Frame& event, int iEvent, podio::version::Ve } template -int read_frames(const std::string& filename) { +int read_frames(const std::string& filename, bool assertBuildVersion = true) { auto reader = ReaderT(); try { reader.openFile(filename); @@ -70,7 +70,7 @@ int read_frames(const std::string& filename) { return 1; } - if (reader.currentFileVersion() != podio::version::build_version) { + if (assertBuildVersion && reader.currentFileVersion() != podio::version::build_version) { std::cerr << "The podio build version could not be read back correctly. " << "(expected:" << podio::version::build_version << ", actual: " << reader.currentFileVersion() << ")" << std::endl; @@ -93,13 +93,15 @@ int read_frames(const std::string& filename) { // sure that the writing/reading order does not impose any usage requirements for (size_t i = 0; i < reader.getEntries("events"); ++i) { auto frame = podio::Frame(reader.readNextEntry("events")); - if (frame.get("emptySubsetColl") == nullptr) { - std::cerr << "Could not retrieve an empty subset collection" << std::endl; - return 1; - } - if (frame.get("emptyCollection") == nullptr) { - std::cerr << "Could not retrieve an empty collection" << std::endl; - return 1; + if (reader.currentFileVersion() > podio::version::Version{0, 16, 2}) { + if (frame.get("emptySubsetColl") == nullptr) { + std::cerr << "Could not retrieve an empty subset collection" << std::endl; + return 1; + } + if (frame.get("emptyCollection") == nullptr) { + std::cerr << "Could not retrieve an empty collection" << std::endl; + return 1; + } } processEvent(frame, i, reader.currentFileVersion()); @@ -107,7 +109,9 @@ int read_frames(const std::string& filename) { auto otherFrame = podio::Frame(reader.readNextEntry("other_events")); processEvent(otherFrame, i + 100, reader.currentFileVersion()); // The other_events category also holds external collections - processExtensions(otherFrame, i + 100, reader.currentFileVersion()); + if (reader.currentFileVersion() > podio::version::Version{0, 16, 2}) { + processExtensions(otherFrame, i + 100, reader.currentFileVersion()); + } } if (reader.readNextEntry("events")) { @@ -131,12 +135,16 @@ int read_frames(const std::string& filename) { auto otherFrame = podio::Frame(reader.readEntry("other_events", 4)); processEvent(otherFrame, 4 + 100, reader.currentFileVersion()); - processExtensions(otherFrame, 4 + 100, reader.currentFileVersion()); + if (reader.currentFileVersion() > podio::version::Version{0, 16, 2}) { + processExtensions(otherFrame, 4 + 100, reader.currentFileVersion()); + } // Jumping back also works auto previousFrame = podio::Frame(reader.readEntry("other_events", 2)); processEvent(previousFrame, 2 + 100, reader.currentFileVersion()); - processExtensions(previousFrame, 2 + 100, reader.currentFileVersion()); + if (reader.currentFileVersion() > podio::version::Version{0, 16, 2}) { + processExtensions(previousFrame, 2 + 100, reader.currentFileVersion()); + } // Trying to read a Frame that is not present returns a nullptr if (reader.readEntry("events", 10)) { diff --git a/tests/read_frame_auxiliary.h b/tests/read_frame_auxiliary.h index 0546d6cd0..66473e2c3 100644 --- a/tests/read_frame_auxiliary.h +++ b/tests/read_frame_auxiliary.h @@ -4,6 +4,7 @@ #include "write_frame.h" #include "podio/Frame.h" +#include "podio/podioVersion.h" #include #include @@ -59,7 +60,12 @@ int test_frame_aux_info(const std::string& fileName) { // written auto event = podio::Frame(reader.readEntry("events", 0)); - return testGetAvailableCollections(event, collsToWrite); + auto collsToRead = collsToWrite; + if (reader.currentFileVersion() < podio::version::Version{0, 16, 3}) { + collsToRead.erase(collsToRead.end() - 2, collsToRead.end()); + } + + return testGetAvailableCollections(event, collsToRead); } #endif // PODIO_TESTS_READ_FRAME_AUXILIARY_H diff --git a/tests/read_frame_root.cpp b/tests/read_frame_root.cpp new file mode 100644 index 000000000..b687fa83e --- /dev/null +++ b/tests/read_frame_root.cpp @@ -0,0 +1,19 @@ +#include "read_frame.h" +#include "read_frame_auxiliary.h" + +#include "podio/ROOTFrameReader.h" + +#include +#include + +int main(int argc, char* argv[]) { + std::string inputFile = "example_frame.root"; + bool assertBuildVersion = true; + if (argc == 2) { + inputFile = argv[1]; + assertBuildVersion = false; + } + + return read_frames(inputFile, assertBuildVersion) + + test_frame_aux_info(inputFile); +} diff --git a/tests/scripts/get_test_inputs.sh b/tests/scripts/get_test_inputs.sh index f1c68be73..c596ca3f7 100644 --- a/tests/scripts/get_test_inputs.sh +++ b/tests/scripts/get_test_inputs.sh @@ -8,7 +8,16 @@ export PODIO_TEST_INPUT_DATA_DIR # Get a legacy file with the oldest version that we still support cd ${PODIO_TEST_INPUT_DATA_DIR} +mkdir v00-13 && cd v00-13 wget https://key4hep.web.cern.ch:443/testFiles/podio/v00-13/example.root > /dev/null 2>&1 +for version in v00-16 v00-16-05; do + cd ${PODIO_TEST_INPUT_DATA_DIR} + mkdir ${version} && cd ${version} + for fileName in example.root example_frame.root; do + wget https://key4hep.web.cern.ch:443/testFiles/podio/${version}/${fileName} > /dev/null 2>&1 + done +done + # Announce where we store variables to the outside echo -n ${PODIO_TEST_INPUT_DATA_DIR} From bcc837078628e053887e74f2e3276a935e1bff9f Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 5 Jun 2023 12:28:17 +0200 Subject: [PATCH 072/100] Make branch names for relations more legible for ROOT based I/O (#405) * Add RelationNames and code gen to populate it * Write branches with proper names in ROOTFrameWriter * Make the Frame reader use the registry to get branch names * Make sure that the EDM always is in the registry * Make sure that the names are populated when they are used * Make relation branch names start with an underscore Less prone to accidental collisions. Plus sort of conveying that they are "hidden" * Make it possible to read index based files as well - Keep branch names around for resetting them (since invalidated branches can no longer provide that information) * Fix legacy reading * Manually increase patch to make if clause work properly --- CMakeLists.txt | 2 +- include/podio/CollectionBase.h | 2 + include/podio/CollectionBranches.h | 3 + include/podio/CollectionBuffers.h | 1 + include/podio/DatamodelRegistry.h | 40 +++++- python/podio_class_generator.py | 6 + python/templates/Collection.h.jinja2 | 4 + python/templates/DatamodelDefinition.h.jinja2 | 42 ++++++- src/DatamodelRegistry.cc | 26 +++- src/ROOTFrameReader.cc | 114 ++++++++++++------ src/ROOTFrameWriter.cc | 40 +++--- src/rootUtils.h | 30 +++++ 12 files changed, 249 insertions(+), 61 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 700536436..577fac281 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ project(podio) #--- Version ------------------------------------------------------------------- SET( ${PROJECT_NAME}_VERSION_MAJOR 0 ) SET( ${PROJECT_NAME}_VERSION_MINOR 16 ) -SET( ${PROJECT_NAME}_VERSION_PATCH 5 ) +SET( ${PROJECT_NAME}_VERSION_PATCH 6 ) SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" ) diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index 7e2a51b2c..45f179e96 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -14,6 +14,8 @@ namespace podio { // forward declarations class ICollectionProvider; +struct RelationNames; + class CollectionBase { protected: /// default constructor diff --git a/include/podio/CollectionBranches.h b/include/podio/CollectionBranches.h index ddbce7ec2..9b20016aa 100644 --- a/include/podio/CollectionBranches.h +++ b/include/podio/CollectionBranches.h @@ -3,6 +3,7 @@ #include "TBranch.h" +#include #include namespace podio::root_utils { @@ -15,6 +16,8 @@ struct CollectionBranches { TBranch* data{nullptr}; std::vector refs{}; std::vector vecs{}; + std::vector refNames{}; ///< The names of the relation branches + std::vector vecNames{}; ///< The names of the vector member branches }; } // namespace podio::root_utils diff --git a/include/podio/CollectionBuffers.h b/include/podio/CollectionBuffers.h index 80b94c6dd..8846af162 100644 --- a/include/podio/CollectionBuffers.h +++ b/include/podio/CollectionBuffers.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/include/podio/DatamodelRegistry.h b/include/podio/DatamodelRegistry.h index a32aa8218..77f31192b 100644 --- a/include/podio/DatamodelRegistry.h +++ b/include/podio/DatamodelRegistry.h @@ -3,11 +3,39 @@ #include #include +#include +#include #include #include namespace podio { +/** + * Type alias for storing the names of all Relations and VectorMembers for all + * datatypes of an EDM. Populated for each EDM at code generation time. + * The structure is of each element in the outer vector is: + * - get<0>: The name of the datatype + * - get<1>: The names of all Relations, where OneToManyRelations comes before + * OneToOneRelations (in the order as they appear in the YAML file) + * - get<2>: The names of all VectorMembers (in the order of the file YAML) + */ +using RelationNameMapping = + std::vector, std::vector>>; + +/** + * Information on the names of the OneTo[One|Many]Relations as well as the + * VectorMembers of a datatype + * + * The contents are populated by the code generation, where we simply generate + * static vectors that we make available as const& here. + */ +struct RelationNames { + /// The names of the relations (OneToMany before OneToOne) + const std::vector& relations; + /// The names of the vector members + const std::vector& vectorMembers; +}; + /** * Global registry holding information about datamodels and datatypes defined * therein that are currently known by podio (i.e. which have been dynamically @@ -85,14 +113,24 @@ class DatamodelRegistry { * @param name The name of the EDM that should be registered * @param definition The datamodel definition from which this EDM has been * generated in JSON format + * @param relationNames the names of the relations and vector members for all + * datatypes that are defined for this EDM * */ - size_t registerDatamodel(std::string name, std::string_view definition); + size_t registerDatamodel(std::string name, std::string_view definition, + const podio::RelationNameMapping& relationNames); + + /** + * Get the names of the relations and vector members of a datatype + */ + RelationNames getRelationNames(std::string_view typeName) const; private: DatamodelRegistry() = default; /// The stored definitions std::vector> m_definitions{}; + + std::unordered_map m_relations{}; }; } // namespace podio diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 0b37d706c..4f86f2a45 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -410,8 +410,14 @@ def _write_edm_def_file(self): 'edm_definition': model_encoder.encode(self.datamodel), 'incfolder': self.incfolder, 'schema_version': self.datamodel.schema_version, + 'datatypes': self.datamodel.datatypes, } + def quoted_sv(string): + return f"\"{string}\"sv" + + self.env.filters["quoted_sv"] = quoted_sv + self._write_file('DatamodelDefinition.h', self._eval_template('DatamodelDefinition.h.jinja2', data)) diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index f91c02db5..b70e93b12 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -32,6 +32,10 @@ #include #include +namespace podio { + struct RelationNames; +} + {{ utils.namespace_open(class.namespace) }} diff --git a/python/templates/DatamodelDefinition.h.jinja2 b/python/templates/DatamodelDefinition.h.jinja2 index c424ded7f..ce4cefb41 100644 --- a/python/templates/DatamodelDefinition.h.jinja2 +++ b/python/templates/DatamodelDefinition.h.jinja2 @@ -9,6 +9,27 @@ namespace {{ package_name }}::meta { */ static constexpr auto {{ package_name }}__JSONDefinition = R"DATAMODELDEF({{ edm_definition }})DATAMODELDEF"; + +/** + * The names of all relations and vector members for all datatypes + */ +inline podio::RelationNameMapping {{ package_name }}__getRelationNames() { + using namespace std::string_view_literals; + return { +{% for typeName, type in datatypes.items() %} + {"{{ typeName }}"sv, + { {{ (type.OneToManyRelations + type.OneToOneRelations) | map(attribute="name") | map("quoted_sv") | join(", ") }} }, + { {{ type.VectorMembers | map(attribute="name") | map("quoted_sv") | join(", ")}} }, + }, +{% endfor %} + }; +} + +/** + * The schema version at generation time + */ +static constexpr podio::SchemaVersionT schemaVersion = {{ schema_version }}; + /** * The helper class that takes care of registering the datamodel definition to * the DatamodelRegistry and to provide the index in that registry. @@ -19,18 +40,27 @@ static constexpr auto {{ package_name }}__JSONDefinition = R"DATAMODELDEF({{ edm class DatamodelRegistryIndex { public: static size_t value() { - static auto index = DatamodelRegistryIndex(podio::DatamodelRegistry::mutInstance().registerDatamodel("{{ package_name }}", {{ package_name }}__JSONDefinition)); + static const auto relationNames = {{ package_name }}__getRelationNames(); + static auto index = DatamodelRegistryIndex(podio::DatamodelRegistry::mutInstance().registerDatamodel("{{ package_name }}", {{ package_name }}__JSONDefinition, relationNames)); return index.m_value; } - private: DatamodelRegistryIndex(size_t v) : m_value(v) {} size_t m_value{podio::DatamodelRegistry::NoDefinitionAvailable}; }; -/** - * The schema version at generation time - */ -static constexpr podio::SchemaVersionT schemaVersion = {{ schema_version }}; + +namespace static_registration { + // The usual trick via an IIFE and a const variable that we assign to, to + // ensure that we populate this before everything starts + inline bool ensureRegistration() { + const static auto reg = []() { + return {{ package_name }}::meta::DatamodelRegistryIndex::value() != podio::DatamodelRegistry::NoDefinitionAvailable; + }(); + return reg; + } + + const auto registrationEnsured = ensureRegistration(); +} } // namespace {{ package_name }}::meta diff --git a/src/DatamodelRegistry.cc b/src/DatamodelRegistry.cc index d5a96e364..16e387132 100644 --- a/src/DatamodelRegistry.cc +++ b/src/DatamodelRegistry.cc @@ -15,13 +15,19 @@ DatamodelRegistry& DatamodelRegistry::mutInstance() { return registryInstance; } -size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view definition) { +size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view definition, + const podio::RelationNameMapping& relationNames) { const auto it = std::find_if(m_definitions.cbegin(), m_definitions.cend(), [&name](const auto& kvPair) { return kvPair.first == name; }); if (it == m_definitions.cend()) { int index = m_definitions.size(); m_definitions.emplace_back(name, definition); + + for (const auto& [typeName, relations, vectorMembers] : relationNames) { + m_relations.emplace(typeName, RelationNames{relations, vectorMembers}); + } + return index; } @@ -60,4 +66,22 @@ const std::string& DatamodelRegistry::getDatamodelName(size_t index) const { return m_definitions[index].first; } +RelationNames DatamodelRegistry::getRelationNames(std::string_view typeName) const { + static std::vector emptyVec{}; + if (typeName.substr(0, 24) == "podio::UserDataCollection") { + return {emptyVec, emptyVec}; + } + + // Strip Collection if necessary + if (typeName.size() > 10 && typeName.substr(typeName.size() - 10) == "Collection") { + typeName = typeName.substr(0, typeName.size() - 10); + } + + if (const auto it = m_relations.find(typeName); it != m_relations.end()) { + return it->second; + } + + return {emptyVec, emptyVec}; +} + } // namespace podio diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index c1a7461d8..d7da9dfa9 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -3,6 +3,7 @@ #include "podio/CollectionBufferFactory.h" #include "podio/CollectionBuffers.h" #include "podio/CollectionIDTable.h" +#include "podio/DatamodelRegistry.h" #include "podio/GenericParameters.h" #include "rootUtils.h" @@ -22,6 +23,10 @@ std::tuple, std::vector& collInfo); +std::tuple, std::vector>> +createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, + const std::vector& collInfo); + GenericParameters ROOTFrameReader::readEntryParameters(ROOTFrameReader::CategoryInfo& catInfo, bool reloadBranches, unsigned int localEntry) { // Parameter branch is always the last one @@ -94,23 +99,7 @@ podio::CollectionReadBuffers ROOTFrameReader::getCollectionBuffers(ROOTFrameRead auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); if (reloadBranches) { - branches.data = root_utils::getBranch(catInfo.chain.get(), name.c_str()); - - // reference collections - if (auto* refCollections = collBuffers.references) { - for (size_t i = 0; i < refCollections->size(); ++i) { - const auto brName = root_utils::refBranch(name, i); - branches.refs[i] = root_utils::getBranch(catInfo.chain.get(), brName.c_str()); - } - } - - // vector members - if (auto* vecMembers = collBuffers.vectorMembers) { - for (size_t i = 0; i < vecMembers->size(); ++i) { - const auto brName = root_utils::vecBranch(name, i); - branches.vecs[i] = root_utils::getBranch(catInfo.chain.get(), brName.c_str()); - } - } + root_utils::resetBranches(catInfo.chain.get(), branches, name); } // set the addresses and read the data @@ -164,8 +153,15 @@ void ROOTFrameReader::initCategory(CategoryInfo& catInfo, const std::string& cat collInfoBranch->GetEntry(0); } - std::tie(catInfo.branches, catInfo.storedClasses) = - createCollectionBranches(catInfo.chain.get(), *catInfo.table, *collInfo); + // For backwards compatibility make it possible to read the index based files + // from older versions + if (m_fileVersion <= podio::version::Version{0, 16, 5}) { + std::tie(catInfo.branches, catInfo.storedClasses) = + createCollectionBranchesIndexBased(catInfo.chain.get(), *catInfo.table, *collInfo); + } else { + std::tie(catInfo.branches, catInfo.storedClasses) = + createCollectionBranches(catInfo.chain.get(), *catInfo.table, *collInfo); + } delete collInfo; @@ -255,8 +251,8 @@ std::vector ROOTFrameReader::getAvailableCategories() const { } std::tuple, std::vector>> -createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo) { +createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, + const std::vector& collInfo) { size_t collectionIndex{0}; std::vector collBranches; @@ -269,34 +265,84 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, // to check here const auto name = idTable.name(collID); - root_utils::CollectionBranches branches{}; const auto collectionClass = TClass::GetClass(collType.c_str()); - // Need the collection here to setup all the branches. Have to manage the // temporary collection ourselves auto collection = std::unique_ptr(static_cast(collectionClass->New())); - collection->setSubsetCollection(isSubsetColl); - - if (!isSubsetColl) { + root_utils::CollectionBranches branches{}; + if (isSubsetColl) { + // Only one branch will exist and we can trivially get its name + auto brName = root_utils::refBranch(name, 0); + branches.refs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.refNames.emplace_back(std::move(brName)); + } else { // This branch is guaranteed to exist since only collections that are // also written to file are in the info metadata that we work with here branches.data = root_utils::getBranch(chain, name.c_str()); + + const auto buffers = collection->getBuffers(); + for (size_t i = 0; i < buffers.references->size(); ++i) { + auto brName = root_utils::refBranch(name, i); + branches.refs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.refNames.emplace_back(std::move(brName)); + } + + for (size_t i = 0; i < buffers.vectorMembers->size(); ++i) { + auto brName = root_utils::vecBranch(name, i); + branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.vecNames.emplace_back(std::move(brName)); + } } - const auto buffers = collection->getBuffers(); - for (size_t i = 0; i < buffers.references->size(); ++i) { - const auto brName = root_utils::refBranch(name, i); + storedClasses.emplace_back(name, std::make_tuple(collType, isSubsetColl, collSchemaVersion, collectionIndex++)); + collBranches.emplace_back(std::move(branches)); + } + + return {collBranches, storedClasses}; +} + +std::tuple, std::vector>> +createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, + const std::vector& collInfo) { + + size_t collectionIndex{0}; + std::vector collBranches; + collBranches.reserve(collInfo.size() + 1); + std::vector> storedClasses; + storedClasses.reserve(collInfo.size()); + + for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { + // We only write collections that are in the collectionIDTable, so no need + // to check here + const auto name = idTable.name(collID); + + root_utils::CollectionBranches branches{}; + if (isSubsetColl) { + // Only one branch will exist and we can trivially get its name + auto brName = root_utils::subsetBranch(name); branches.refs.push_back(root_utils::getBranch(chain, brName.c_str())); - } + branches.refNames.emplace_back(std::move(brName)); + } else { + // This branch is guaranteed to exist since only collections that are + // also written to file are in the info metadata that we work with here + branches.data = root_utils::getBranch(chain, name.c_str()); - for (size_t i = 0; i < buffers.vectorMembers->size(); ++i) { - const auto brName = root_utils::vecBranch(name, i); - branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str())); + const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(collType); + for (const auto& relName : relVecNames.relations) { + auto brName = root_utils::refBranch(name, relName); + branches.refs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.refNames.emplace_back(std::move(brName)); + } + for (const auto& vecName : relVecNames.vectorMembers) { + auto brName = root_utils::refBranch(name, vecName); + branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.vecNames.emplace_back(std::move(brName)); + } } storedClasses.emplace_back(name, std::make_tuple(collType, isSubsetColl, collSchemaVersion, collectionIndex++)); - collBranches.push_back(branches); + collBranches.emplace_back(std::move(branches)); } return {collBranches, storedClasses}; diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index e6fa85de6..ce9261b60 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -1,5 +1,6 @@ #include "podio/ROOTFrameWriter.h" #include "podio/CollectionBase.h" +#include "podio/DatamodelRegistry.h" #include "podio/Frame.h" #include "podio/GenericParameters.h" #include "podio/podioVersion.h" @@ -68,29 +69,32 @@ void ROOTFrameWriter::initBranches(CategoryInfo& catInfo, const std::vectorgetBuffers(); - - // data buffer branch, only for non-subset collections - if (buffers.data) { + // For subset collections we only fill one references branch + if (coll->isSubsetCollection()) { + auto& refColl = (*buffers.references)[0]; + const auto brName = root_utils::subsetBranch(name); + branches.refs.push_back(catInfo.tree->Branch(brName.c_str(), refColl.get())); + } else { + // For "proper" collections we populate all branches, starting with the data auto bufferDataType = "vector<" + coll->getDataTypeName() + ">"; branches.data = catInfo.tree->Branch(name.c_str(), bufferDataType.c_str(), buffers.data); - } - // reference collections - if (auto refColls = buffers.references) { - int i = 0; - for (auto& c : (*refColls)) { - const auto brName = root_utils::refBranch(name, i++); - branches.refs.push_back(catInfo.tree->Branch(brName.c_str(), c.get())); + const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(coll->getValueTypeName()); + if (auto refColls = buffers.references) { + int i = 0; + for (auto& c : (*refColls)) { + const auto brName = root_utils::refBranch(name, relVecNames.relations[i++]); + branches.refs.push_back(catInfo.tree->Branch(brName.c_str(), c.get())); + } } - } - // vector members - if (auto vmInfo = buffers.vectorMembers) { - int i = 0; - for (auto& [type, vec] : (*vmInfo)) { - const auto typeName = "vector<" + type + ">"; - const auto brName = root_utils::vecBranch(name, i++); - branches.vecs.push_back(catInfo.tree->Branch(brName.c_str(), typeName.c_str(), vec)); + if (auto vmInfo = buffers.vectorMembers) { + int i = 0; + for (auto& [type, vec] : (*vmInfo)) { + const auto typeName = "vector<" + type + ">"; + const auto brName = root_utils::vecBranch(name, relVecNames.vectorMembers[i++]); + branches.vecs.push_back(catInfo.tree->Branch(brName.c_str(), typeName.c_str(), vec)); + } } } diff --git a/src/rootUtils.h b/src/rootUtils.h index b4859bbf8..d6072fa09 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -76,10 +76,40 @@ inline std::string refBranch(const std::string& name, size_t index) { return name + "#" + std::to_string(index); } +inline std::string refBranch(const std::string& name, std::string_view relName) { + return "_" + name + "_" + std::string(relName); +} + inline std::string vecBranch(const std::string& name, size_t index) { return name + "_" + std::to_string(index); } +inline std::string vecBranch(const std::string& name, std::string_view vecName) { + return "_" + name + "_" + std::string(vecName); +} + +/// The name for subset branches +inline std::string subsetBranch(const std::string& name) { + return name + "_objIdx"; +} + +/** + * Reset all the branches that by getting them from the TTree again + */ +inline void resetBranches(TTree* chain, CollectionBranches& branches, const std::string& name) { + if (branches.data) { + branches.data = getBranch(chain, name); + } + + for (size_t i = 0; i < branches.refs.size(); ++i) { + branches.refs[i] = getBranch(chain, branches.refNames[i]); + } + + for (size_t i = 0; i < branches.vecs.size(); ++i) { + branches.vecs[i] = getBranch(chain, branches.vecNames[i]); + } +} + template inline void setCollectionAddresses(const BufferT& collBuffers, const CollectionBranches& branches) { From 69793032abdd74049df6ebd2c6deb198b12f57c2 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 5 Jun 2023 14:47:27 +0200 Subject: [PATCH 073/100] Sort the branches alphabetically when writing Frames (#421) --- src/ROOTFrameWriter.cc | 2 +- src/rootUtils.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index ce9261b60..5c1cc195d 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -26,7 +26,7 @@ void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& c // been initialized if (catInfo.tree == nullptr) { catInfo.idTable = frame.getCollectionIDTableForWrite(); - catInfo.collsToWrite = collsToWrite; + catInfo.collsToWrite = root_utils::sortAlphabeticaly(collsToWrite); catInfo.tree = new TTree(category.c_str(), (category + " data tree").c_str()); catInfo.tree->SetDirectory(m_file.get()); } diff --git a/src/rootUtils.h b/src/rootUtils.h index d6072fa09..7c6311c83 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -11,6 +11,8 @@ #include "TClass.h" #include "TTree.h" +#include +#include #include #include #include @@ -182,6 +184,25 @@ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable return collInfo; } +/** + * Sort the input vector of strings alphabetically, case insensitive. + */ +inline std::vector sortAlphabeticaly(std::vector strings) { + // Obviously there is no tolower(std::string) in c++, so this is slightly more + // involved and we make use of the fact that lexicographical_compare works on + // ranges and the fact that we can feed it a dedicated comparison function, + // where we convert the strings to lower case char-by-char. The alternative is + // to make string copies inside the first lambda, transform them to lowercase + // and then use operator< of std::string, which would be effectively + // hand-writing what is happening below. + std::sort(strings.begin(), strings.end(), [](const auto& lhs, const auto& rhs) { + return std::lexicographical_compare( + lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), + [](const auto& cl, const auto& cr) { return std::tolower(cl) < std::tolower(cr); }); + }); + return strings; +} + } // namespace podio::root_utils #endif From e7e70c65df025bed91ce6c2b31c485b34d5477ab Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 8 Jun 2023 10:28:27 +0200 Subject: [PATCH 074/100] Make Catch2 setup more robust against podios in environment (#425) * Make unittest environment slightly more robust * Make catch test discovery skippable * Specifically use python3 for python unittests --- tests/CMakeLists.txt | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ae5b0f7f4..344d910db 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -204,7 +204,7 @@ if (TARGET read_sio) set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio write_timed_sio) endif() -add_test( NAME pyunittest COMMAND python -m unittest discover -s ${CMAKE_SOURCE_DIR}/python/podio) +add_test( NAME pyunittest COMMAND python3 -m unittest discover -s ${CMAKE_SOURCE_DIR}/python/podio) set_property(TEST pyunittest PROPERTY ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} @@ -240,18 +240,20 @@ if (NOT FORCE_RUN_ALL_TESTS) endif() endif() -if (USE_SANITIZER MATCHES "Memory(WithOrigin)?") +option(SKIP_CATCH_DISCOVERY "Skip the Catch2 test discovery" OFF) + +if (USE_SANITIZER MATCHES "Memory(WithOrigin)?" OR SKIP_CATCH_DISCOVERY) # Automatic test discovery fails with Memory sanitizers due to some issues in # Catch2. So in that case we skip the discovery step and simply run the thing # directly in the tests. - if (FORCE_RUN_ALL_TESTS) + if (FORCE_RUN_ALL_TESTS OR SKIP_CATCH_DISCOVERY) # Unfortunately Memory sanitizer seems to be really unhappy with Catch2 and # it fails to succesfully launch the executable and execute any test. Here # we just include them in order to have them show up as failing add_test(NAME unittest COMMAND unittest ${filter_tests}) set_property(TEST unittest PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} + LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} ) endif() else() @@ -262,7 +264,7 @@ else() TEST_SPEC ${filter_tests} # discover only tests that are known to not fail PROPERTIES ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} + LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} ) endif() From ac086e004bdb6f7d8972a1f62276504bb21370e0 Mon Sep 17 00:00:00 2001 From: hegner Date: Thu, 8 Jun 2023 13:12:07 +0200 Subject: [PATCH 075/100] Make CollectionIDs a 32bit hash value of the collection name (#412) * add hashing feature to CollectionID table * move collectionID from mix of int/unsigned to consistent uint32_t * fix collectionID in SIOBlock * use 32 bit hash; use murmurhash3 * Ignore tests in UB sanitizer runs * Add standalone executable for collision detection * protect frame against double insert --------- Co-authored-by: tmadlener --- include/podio/CollectionBase.h | 4 +- include/podio/CollectionIDTable.h | 15 +- include/podio/EventStore.h | 12 +- include/podio/Frame.h | 11 +- include/podio/ICollectionProvider.h | 4 +- include/podio/IMetaDataProvider.h | 2 +- include/podio/ObjectID.h | 4 +- include/podio/ROOTFrameWriter.h | 2 +- include/podio/ROOTLegacyReader.h | 2 +- include/podio/ROOTReader.h | 2 +- include/podio/SIOBlock.h | 6 +- include/podio/UserDataCollection.h | 6 +- python/templates/Collection.h.jinja2 | 10 +- python/templates/CollectionData.cc.jinja2 | 2 +- python/templates/CollectionData.h.jinja2 | 2 +- python/templates/Obj.cc.jinja2 | 4 +- python/templates/macros/collections.jinja2 | 2 +- .../templates/macros/implementations.jinja2 | 2 +- src/CMakeLists.txt | 8 + src/CollectionIDTable.cc | 16 +- src/EventStore.cc | 18 +- src/MurmurHash3.cpp | 442 ++++++++++++++++++ src/MurmurHash3.h | 37 ++ src/rootUtils.h | 2 +- src/selection.xml | 3 + src/sioUtils.h | 2 +- src/test_hashes.cpp | 126 +++++ tests/CMakeLists.txt | 2 + tests/frame.cpp | 12 + tests/unittest.cpp | 2 +- 30 files changed, 691 insertions(+), 71 deletions(-) create mode 100644 src/MurmurHash3.cpp create mode 100644 src/MurmurHash3.h create mode 100644 src/test_hashes.cpp diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index 45f179e96..d502c124b 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -41,10 +41,10 @@ class CollectionBase { virtual bool setReferences(const ICollectionProvider* collectionProvider) = 0; /// set collection ID - virtual void setID(unsigned id) = 0; + virtual void setID(uint32_t id) = 0; /// get collection ID - virtual unsigned getID() const = 0; + virtual uint32_t getID() const = 0; /// Get the collection buffers for this collection virtual podio::CollectionWriteBuffers getBuffers() = 0; diff --git a/include/podio/CollectionIDTable.h b/include/podio/CollectionIDTable.h index 47d51251b..39b947c2e 100644 --- a/include/podio/CollectionIDTable.h +++ b/include/podio/CollectionIDTable.h @@ -1,6 +1,7 @@ #ifndef PODIO_COLLECTIONIDTABLE_H #define PODIO_COLLECTIONIDTABLE_H +#include #include #include #include @@ -20,15 +21,15 @@ class CollectionIDTable { CollectionIDTable& operator=(CollectionIDTable&&) = default; /// constructor from existing ID:name mapping - CollectionIDTable(std::vector&& ids, std::vector&& names); + CollectionIDTable(std::vector&& ids, std::vector&& names); - CollectionIDTable(const std::vector& ids, const std::vector& names); + CollectionIDTable(const std::vector& ids, const std::vector& names); /// return collection ID for given name - int collectionID(const std::string& name) const; + uint32_t collectionID(const std::string& name) const; /// return name for given collection ID - const std::string name(int collectionID) const; + const std::string name(uint32_t collectionID) const; /// Check if collection name is known bool present(const std::string& name) const; @@ -39,13 +40,13 @@ class CollectionIDTable { }; /// return the ids - const std::vector& ids() const { + const std::vector& ids() const { return m_collectionIDs; } /// register new name to the table /// returns assigned collection ID - int add(const std::string& name); + uint32_t add(const std::string& name); /// Prints collection information void print() const; @@ -56,7 +57,7 @@ class CollectionIDTable { } private: - std::vector m_collectionIDs{}; + std::vector m_collectionIDs{}; std::vector m_names{}; mutable std::unique_ptr m_mutex{nullptr}; }; diff --git a/include/podio/EventStore.h b/include/podio/EventStore.h index f8ee70dc4..05ae58b38 100644 --- a/include/podio/EventStore.h +++ b/include/podio/EventStore.h @@ -58,13 +58,8 @@ class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataPro template bool get(const std::string& name, const T*& collection); - /// fast access to cached collections - CollectionBase* getFast(int id) const { - return (m_cachedCollections.size() > (unsigned)id ? m_cachedCollections[id] : nullptr); - } - /// access a collection by ID. returns true if successful - bool get(int id, CollectionBase*& coll) const final; + bool get(uint32_t id, CollectionBase*& coll) const final; /// access a collection by name /// returns a collection w/ setting isValid to true if successful @@ -96,7 +91,7 @@ class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataPro GenericParameters& getRunMetaData(int runID) override; /// return the collection meta data for the given colID - GenericParameters& getCollectionMetaData(int colID) override; + GenericParameters& getCollectionMetaData(uint32_t colID) override; RunMDMap* getRunMetaDataMap() { return &m_runMDMap; @@ -118,9 +113,8 @@ class DEPR_EVTSTORE EventStore : public ICollectionProvider, public IMetaDataPro } // members - mutable std::set m_retrievedIDs{}; + mutable std::set m_retrievedIDs{}; mutable CollContainer m_collections{}; - mutable std::vector m_cachedCollections{}; IReader* m_reader{nullptr}; std::shared_ptr m_table; diff --git a/include/podio/Frame.h b/include/podio/Frame.h index 4943c4160..56ffe9cc2 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -120,7 +121,7 @@ class Frame { return *m_parameters; }; - bool get(int collectionID, podio::CollectionBase*& collection) const override; + bool get(uint32_t collectionID, podio::CollectionBase*& collection) const override; podio::CollectionIDTable getIDTable() const override { // Make a copy @@ -140,8 +141,8 @@ class Frame { mutable std::unique_ptr m_dataMtx{nullptr}; ///< The mutex for guarding the raw data podio::CollectionIDTable m_idTable{}; ///< The collection ID table std::unique_ptr m_parameters{nullptr}; ///< The generic parameter store for this frame - mutable std::set m_retrievedIDs{}; ///< The IDs of the collections that we have already read (but not yet put - ///< into the map) + mutable std::set m_retrievedIDs{}; ///< The IDs of the collections that we have already read (but not yet + ///< put into the map) }; std::unique_ptr m_self; ///< The internal concept pointer through which all the work is done @@ -386,7 +387,7 @@ podio::CollectionBase* Frame::FrameModel::doGet(const std::string& n } template -bool Frame::FrameModel::get(int collectionID, CollectionBase*& collection) const { +bool Frame::FrameModel::get(uint32_t collectionID, CollectionBase*& collection) const { const auto& name = m_idTable.name(collectionID); const auto& [_, inserted] = m_retrievedIDs.insert(collectionID); @@ -420,6 +421,8 @@ const podio::CollectionBase* Frame::FrameModel::put(std::unique_ptr< // collisions from collections that are potentially present from rawdata? it->second->setID(m_idTable.add(name)); return it->second.get(); + } else { + throw std::invalid_argument("An object with key " + name + " already exists in the frame"); } } diff --git a/include/podio/ICollectionProvider.h b/include/podio/ICollectionProvider.h index 1d5bfc53f..3a724f26a 100644 --- a/include/podio/ICollectionProvider.h +++ b/include/podio/ICollectionProvider.h @@ -1,6 +1,8 @@ #ifndef PODIO_ICOLLECTIONPROVIDER_H #define PODIO_ICOLLECTIONPROVIDER_H +#include + namespace podio { class CollectionBase; @@ -10,7 +12,7 @@ class ICollectionProvider { /// destructor virtual ~ICollectionProvider() = default; /// access a collection by ID. returns true if successful - virtual bool get(int collectionID, CollectionBase*& collection) const = 0; + virtual bool get(uint32_t collectionID, CollectionBase*& collection) const = 0; }; } // namespace podio diff --git a/include/podio/IMetaDataProvider.h b/include/podio/IMetaDataProvider.h index 20f1941b0..b8c662277 100644 --- a/include/podio/IMetaDataProvider.h +++ b/include/podio/IMetaDataProvider.h @@ -23,7 +23,7 @@ class DEPR_EVTSTORE IMetaDataProvider { virtual GenericParameters& getRunMetaData(int runID) = 0; /// return the collection meta data for the given colID - virtual GenericParameters& getCollectionMetaData(int colID) = 0; + virtual GenericParameters& getCollectionMetaData(uint32_t colID) = 0; }; } // namespace podio diff --git a/include/podio/ObjectID.h b/include/podio/ObjectID.h index fc6037c47..4347a5ba9 100644 --- a/include/podio/ObjectID.h +++ b/include/podio/ObjectID.h @@ -1,6 +1,8 @@ #ifndef PODIO_OBJECTID_H #define PODIO_OBJECTID_H +#include + namespace podio { class ObjectID { @@ -9,7 +11,7 @@ class ObjectID { /// index of object in collection int index; /// ID of the collection - int collectionID; + uint32_t collectionID; /// not part of a collection static const int untracked = -1; diff --git a/include/podio/ROOTFrameWriter.h b/include/podio/ROOTFrameWriter.h index 3b0fde4ba..535b84025 100644 --- a/include/podio/ROOTFrameWriter.h +++ b/include/podio/ROOTFrameWriter.h @@ -54,7 +54,7 @@ class ROOTFrameWriter { // collectionID, collectionType, subsetCollection // NOTE: same as in rootUtils.h private header! - using CollectionInfoT = std::tuple; + using CollectionInfoT = std::tuple; /** * Helper struct to group together all necessary state to write / process a diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h index 4b52b91c6..2a2a8b621 100644 --- a/include/podio/ROOTLegacyReader.h +++ b/include/podio/ROOTLegacyReader.h @@ -91,7 +91,7 @@ class ROOTLegacyReader { private: std::pair getLocalTreeAndEntry(const std::string& treename); - void createCollectionBranches(const std::vector>& collInfo); + void createCollectionBranches(const std::vector>& collInfo); podio::GenericParameters readEventMetaData(); diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index 03a5d5557..757791cf0 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -85,7 +85,7 @@ class ROOTReader : public IReader { std::map* readRunMetaData() override; private: - void createCollectionBranches(const std::vector>& collInfo); + void createCollectionBranches(const std::vector>& collInfo); std::pair getLocalTreeAndEntry(const std::string& treename); // Information about the data vector as wall as the collection class type diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index 5834a9b5b..e7c917d27 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -104,8 +104,8 @@ class SIOCollectionIDTableBlock : public sio::block { SIOCollectionIDTableBlock(podio::EventStore* store); - SIOCollectionIDTableBlock(std::vector&& names, std::vector&& ids, std::vector&& types, - std::vector&& isSubsetColl) : + SIOCollectionIDTableBlock(std::vector&& names, std::vector&& ids, + std::vector&& types, std::vector&& isSubsetColl) : sio::block("CollectionIDs", sio::version::encode_version(0, 3)), _names(std::move(names)), _ids(std::move(ids)), @@ -131,7 +131,7 @@ class SIOCollectionIDTableBlock : public sio::block { private: std::vector _names{}; - std::vector _ids{}; + std::vector _ids{}; std::vector _types{}; std::vector _isSubsetColl{}; }; diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index 4fe575996..dcf726806 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -75,7 +75,7 @@ class UserDataCollection : public CollectionBase { // simpler move-semantics this will be set and properly initialized on // demand during the call to getBuffers std::vector* _vecPtr{nullptr}; - int m_collectionID{0}; + uint32_t m_collectionID{0}; CollRefCollection m_refCollections{}; VectorMembersInfo m_vecmem_info{}; @@ -107,12 +107,12 @@ class UserDataCollection : public CollectionBase { } /// set collection ID - void setID(unsigned id) override { + void setID(uint32_t id) override { m_collectionID = id; } /// get collection ID - unsigned getID() const override { + uint32_t getID() const override { return m_collectionID; } diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index b70e93b12..cbc3945ba 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -59,7 +59,7 @@ public: {{ class.bare_type }}Collection({{ class.bare_type }}Collection&&) = default; {{ class.bare_type }}Collection& operator=({{ class.bare_type }}Collection&&) = default; -// {{ class.bare_type }}Collection({{ class.bare_type }}Vector* data, int collectionID); +// {{ class.bare_type }}Collection({{ class.bare_type }}Vector* data, uint32_t collectionID); ~{{ class.bare_type }}Collection(); void clear() final; @@ -116,17 +116,17 @@ public: /// Get the collection buffers for this collection podio::CollectionWriteBuffers getBuffers() final; - void setID(unsigned ID) final { + void setID(uint32_t ID) final { m_collectionID = ID; if (!m_isSubsetColl) { std::for_each(m_storage.entries.begin(), m_storage.entries.end(), - [ID] ({{ class.bare_type }}Obj* obj) { obj->id = {obj->id.index, static_cast(ID)}; } + [ID] ({{ class.bare_type }}Obj* obj) { obj->id = {obj->id.index, static_cast(ID)}; } ); } m_isValid = true; }; - unsigned getID() const final { + uint32_t getID() const final { return m_collectionID; } @@ -163,7 +163,7 @@ private: bool m_isValid{false}; mutable bool m_isPrepared{false}; bool m_isSubsetColl{false}; - int m_collectionID{0}; + uint32_t m_collectionID{0}; mutable std::unique_ptr m_storageMtx{nullptr}; mutable {{ class.bare_type }}CollectionData m_storage{}; }; diff --git a/python/templates/CollectionData.cc.jinja2 b/python/templates/CollectionData.cc.jinja2 index ddc6f29dd..3ae5d3a80 100644 --- a/python/templates/CollectionData.cc.jinja2 +++ b/python/templates/CollectionData.cc.jinja2 @@ -139,7 +139,7 @@ const auto {{ member.name }}_size = std::accumulate(entries.begin(), entries.end {% endfor %} } -void {{ class_type }}::prepareAfterRead(int collectionID) { +void {{ class_type }}::prepareAfterRead(uint32_t collectionID) { int index = 0; for (auto& data : *m_data) { auto obj = new {{ class.bare_type }}Obj({index, collectionID}, data); diff --git a/python/templates/CollectionData.h.jinja2 b/python/templates/CollectionData.h.jinja2 index 50ae8dd02..0eee7dabe 100644 --- a/python/templates/CollectionData.h.jinja2 +++ b/python/templates/CollectionData.h.jinja2 @@ -65,7 +65,7 @@ public: void prepareForWrite(bool isSubsetColl); - void prepareAfterRead(int collectionID); + void prepareAfterRead(uint32_t collectionID); void makeSubsetCollection(); diff --git a/python/templates/Obj.cc.jinja2 b/python/templates/Obj.cc.jinja2 index 39829310c..58574057d 100644 --- a/python/templates/Obj.cc.jinja2 +++ b/python/templates/Obj.cc.jinja2 @@ -16,7 +16,7 @@ {{ utils.namespace_open(class.namespace) }} {% with obj_type = class.bare_type + 'Obj' %} {{ obj_type }}::{{ obj_type }}() : -{% raw %} ObjBase{{podio::ObjectID::untracked, podio::ObjectID::untracked}, 0}{% endraw %}, +{% raw %} ObjBase{{podio::ObjectID::untracked, 0}, 0}{% endraw %}, data(){{ single_relations_initialize(OneToOneRelations) }} {%- for relation in OneToManyRelations + VectorMembers %}, m_{{ relation.name }}(new std::vector<{{ relation.full_type }}>()) @@ -29,7 +29,7 @@ { } {{ obj_type }}::{{ obj_type }}(const {{ obj_type }}& other) : -{% raw %} ObjBase{{podio::ObjectID::untracked, podio::ObjectID::untracked}, 0}{% endraw %}, +{% raw %} ObjBase{{podio::ObjectID::untracked, 0}, 0}{% endraw %}, data(other.data){{ single_relations_initialize(OneToOneRelations) }} {%- for relation in OneToManyRelations + VectorMembers %}, m_{{ relation.name }}(new std::vector<{{ relation.full_type }}>(*(other.m_{{ relation.name }}))) diff --git a/python/templates/macros/collections.jinja2 b/python/templates/macros/collections.jinja2 index f91dfbedf..d07abad19 100644 --- a/python/templates/macros/collections.jinja2 +++ b/python/templates/macros/collections.jinja2 @@ -48,7 +48,7 @@ std::vector<{{ member.full_type }}> {{ class.bare_type }}Collection::{{ member.n if (obj->m_{{ relation.name }}) { m_refCollections[{{ real_index }}]->emplace_back(obj->m_{{ relation.name }}->getObjectID()); } else { - m_refCollections[{{ real_index }}]->push_back({podio::ObjectID::invalid, podio::ObjectID::invalid}); + m_refCollections[{{ real_index }}]->push_back({podio::ObjectID::invalid, 0}); } } {% endmacro %} diff --git a/python/templates/macros/implementations.jinja2 b/python/templates/macros/implementations.jinja2 index 821056fba..0ddb29e39 100644 --- a/python/templates/macros/implementations.jinja2 +++ b/python/templates/macros/implementations.jinja2 @@ -164,7 +164,7 @@ const podio::ObjectID {{ full_type }}::getObjectID() const { if (m_obj) { return m_obj->id; } - return podio::ObjectID{podio::ObjectID::invalid, podio::ObjectID::invalid}; + return podio::ObjectID{podio::ObjectID::invalid, 0}; } {% set inverse_type = class.bare_type if prefix else 'Mutable' + class.bare_type %} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ae5f0f984..7f71864e3 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,6 +53,7 @@ SET(core_sources DatamodelRegistryIOHelpers.cc UserDataCollection.cc CollectionBufferFactory.cc + MurmurHash3.cpp ) SET(core_headers @@ -164,3 +165,10 @@ if (ENABLE_SIO) DESTINATION "${CMAKE_INSTALL_LIBDIR}" ) endif() + +add_executable(podio_test_hashes test_hashes.cpp) +target_link_libraries(podio_test_hashes PRIVATE podio::podio) +install(TARGETS podio_test_hashes + EXPORT podioTargets + DESTINATION "${CMAKE_INSTALL_BINDIR}" +) diff --git a/src/CollectionIDTable.cc b/src/CollectionIDTable.cc index e5ca3d15d..fa97a4bbb 100644 --- a/src/CollectionIDTable.cc +++ b/src/CollectionIDTable.cc @@ -3,27 +3,29 @@ #include #include +#include "MurmurHash3.h" + namespace podio { CollectionIDTable::CollectionIDTable() : m_mutex(std::make_unique()) { } -CollectionIDTable::CollectionIDTable(std::vector&& ids, std::vector&& names) : +CollectionIDTable::CollectionIDTable(std::vector&& ids, std::vector&& names) : m_collectionIDs(std::move(ids)), m_names(std::move(names)), m_mutex(std::make_unique()) { } -CollectionIDTable::CollectionIDTable(const std::vector& ids, const std::vector& names) : +CollectionIDTable::CollectionIDTable(const std::vector& ids, const std::vector& names) : m_collectionIDs(ids), m_names(names), m_mutex(std::make_unique()) { } -const std::string CollectionIDTable::name(int ID) const { +const std::string CollectionIDTable::name(uint32_t ID) const { std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_collectionIDs), end(m_collectionIDs), ID); const auto index = std::distance(m_collectionIDs.begin(), result); return m_names[index]; } -int CollectionIDTable::collectionID(const std::string& name) const { +uint32_t CollectionIDTable::collectionID(const std::string& name) const { std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_names), end(m_names), name); const auto index = std::distance(m_names.begin(), result); @@ -44,13 +46,13 @@ bool CollectionIDTable::present(const std::string& name) const { return result != end(m_names); } -int CollectionIDTable::add(const std::string& name) { +uint32_t CollectionIDTable::add(const std::string& name) { std::lock_guard lock(*m_mutex); const auto result = std::find(begin(m_names), end(m_names), name); - int ID = 0; + uint32_t ID = 0; if (result == m_names.end()) { m_names.emplace_back(name); - ID = m_names.size(); + MurmurHash3_x86_32(name.c_str(), name.size(), 0, &ID); m_collectionIDs.emplace_back(ID); } else { const auto index = std::distance(m_names.begin(), result); diff --git a/src/EventStore.cc b/src/EventStore.cc index 85dfb3a09..947d83fff 100644 --- a/src/EventStore.cc +++ b/src/EventStore.cc @@ -7,7 +7,6 @@ namespace podio { EventStore::EventStore() : m_table(new CollectionIDTable()) { - m_cachedCollections.resize(128); // allow for a sufficiently large initial number of collections } EventStore::~EventStore() { @@ -16,24 +15,13 @@ EventStore::~EventStore() { } } -bool EventStore::get(int id, CollectionBase*& collection) const { - // see if we have a cached collection - if ((collection = getFast(id)) != nullptr) { - return true; - } - +bool EventStore::get(uint32_t id, CollectionBase*& collection) const { auto val = m_retrievedIDs.insert(id); bool success = false; if (val.second == true) { // collection not yet retrieved in recursive-call auto name = m_table->name(id); success = doGet(name, collection, true); - if (collection != nullptr) { // cache the collection for faster retreaval later - if (m_cachedCollections.size() < (unsigned)id + 1) { - m_cachedCollections.resize(id + 1); - } - m_cachedCollections[id] = collection; - } } else { // collection already requested in recursive call // do not set the references to break collection dependency-cycle @@ -106,7 +94,7 @@ GenericParameters& EventStore::getRunMetaData(int runID) { return m_runMDMap[runID]; } -GenericParameters& EventStore::getCollectionMetaData(int colID) { +GenericParameters& EventStore::getCollectionMetaData(uint32_t colID) { if (m_colMDMap.empty() && m_reader != nullptr) { ColMDMap* tmp = m_reader->readCollectionMetaData(); @@ -135,8 +123,6 @@ void EventStore::clear() { void EventStore::clearCaches() { m_collections.clear(); - m_cachedCollections.clear(); - m_cachedCollections.resize(128); m_retrievedIDs.clear(); } diff --git a/src/MurmurHash3.cpp b/src/MurmurHash3.cpp new file mode 100644 index 000000000..a782eeeaa --- /dev/null +++ b/src/MurmurHash3.cpp @@ -0,0 +1,442 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "MurmurHash3.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + + #define FORCE_INLINE __forceinline + + #include + + #define ROTL32(x, y) _rotl(x, y) + #define ROTL64(x, y) _rotl64(x, y) + + #define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + + #define FORCE_INLINE inline __attribute__((always_inline)) + +inline uint32_t rotl32(uint32_t x, int8_t r) { + return (x << r) | (x >> (32 - r)); +} + +inline uint64_t rotl64(uint64_t x, int8_t r) { + return (x << r) | (x >> (64 - r)); +} + + #define ROTL32(x, y) rotl32(x, y) + #define ROTL64(x, y) rotl64(x, y) + + #define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { + return p[i]; +} + +FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { + return p[i]; +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +FORCE_INLINE uint32_t fmix32(uint32_t h) { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//---------- + +FORCE_INLINE uint64_t fmix64(uint64_t k) { + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + //---------- + // body + + const auto blocks = (const uint32_t*)(data + nblocks * 4); + + for (int i = -nblocks; i; i++) { + uint32_t k1 = getblock32(blocks, i); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + [[fallthrough]]; + case 2: + k1 ^= tail[1] << 8; + [[fallthrough]]; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_128(const void* key, const int len, uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + //---------- + // body + + const auto blocks = (const uint32_t*)(data + nblocks * 16); + + for (int i = -nblocks; i; i++) { + uint32_t k1 = getblock32(blocks, i * 4 + 0); + uint32_t k2 = getblock32(blocks, i * 4 + 1); + uint32_t k3 = getblock32(blocks, i * 4 + 2); + uint32_t k4 = getblock32(blocks, i * 4 + 3); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + + h1 = ROTL32(h1, 19); + h1 += h2; + h1 = h1 * 5 + 0x561ccd1b; + + k2 *= c2; + k2 = ROTL32(k2, 16); + k2 *= c3; + h2 ^= k2; + + h2 = ROTL32(h2, 17); + h2 += h3; + h2 = h2 * 5 + 0x0bcaa747; + + k3 *= c3; + k3 = ROTL32(k3, 17); + k3 *= c4; + h3 ^= k3; + + h3 = ROTL32(h3, 15); + h3 += h4; + h3 = h3 * 5 + 0x96cd1c35; + + k4 *= c4; + k4 = ROTL32(k4, 18); + k4 *= c1; + h4 ^= k4; + + h4 = ROTL32(h4, 13); + h4 += h1; + h4 = h4 * 5 + 0x32ac3b17; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 16); + + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch (len & 15) { + case 15: + k4 ^= tail[14] << 16; + [[fallthrough]]; + case 14: + k4 ^= tail[13] << 8; + [[fallthrough]]; + case 13: + k4 ^= tail[12] << 0; + k4 *= c4; + k4 = ROTL32(k4, 18); + k4 *= c1; + h4 ^= k4; + [[fallthrough]]; + + case 12: + k3 ^= tail[11] << 24; + [[fallthrough]]; + case 11: + k3 ^= tail[10] << 16; + [[fallthrough]]; + case 10: + k3 ^= tail[9] << 8; + [[fallthrough]]; + case 9: + k3 ^= tail[8] << 0; + k3 *= c3; + k3 = ROTL32(k3, 17); + k3 *= c4; + h3 ^= k3; + [[fallthrough]]; + + case 8: + k2 ^= tail[7] << 24; + [[fallthrough]]; + case 7: + k2 ^= tail[6] << 16; + [[fallthrough]]; + case 6: + k2 ^= tail[5] << 8; + [[fallthrough]]; + case 5: + k2 ^= tail[4] << 0; + k2 *= c2; + k2 = ROTL32(k2, 16); + k2 *= c3; + h2 ^= k2; + [[fallthrough]]; + + case 4: + k1 ^= tail[3] << 24; + [[fallthrough]]; + case 3: + k1 ^= tail[2] << 16; + [[fallthrough]]; + case 2: + k1 ^= tail[1] << 8; + [[fallthrough]]; + case 1: + k1 ^= tail[0] << 0; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + h2 ^= len; + h3 ^= len; + h4 ^= len; + + h1 += h2; + h1 += h3; + h1 += h4; + h2 += h1; + h3 += h1; + h4 += h1; + + h1 = fmix32(h1); + h2 = fmix32(h2); + h3 = fmix32(h3); + h4 = fmix32(h4); + + h1 += h2; + h1 += h3; + h1 += h4; + h2 += h1; + h3 += h1; + h4 += h1; + + ((uint32_t*)out)[0] = h1; + ((uint32_t*)out)[1] = h2; + ((uint32_t*)out)[2] = h3; + ((uint32_t*)out)[3] = h4; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128(const void* key, const int len, const uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + //---------- + // body + + const auto blocks = (const uint64_t*)(data); + + for (int i = 0; i < nblocks; i++) { + uint64_t k1 = getblock64(blocks, i * 2 + 0); + uint64_t k2 = getblock64(blocks, i * 2 + 1); + + k1 *= c1; + k1 = ROTL64(k1, 31); + k1 *= c2; + h1 ^= k1; + + h1 = ROTL64(h1, 27); + h1 += h2; + h1 = h1 * 5 + 0x52dce729; + + k2 *= c2; + k2 = ROTL64(k2, 33); + k2 *= c1; + h2 ^= k2; + + h2 = ROTL64(h2, 31); + h2 += h1; + h2 = h2 * 5 + 0x38495ab5; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) { + case 15: + k2 ^= ((uint64_t)tail[14]) << 48; + [[fallthrough]]; + case 14: + k2 ^= ((uint64_t)tail[13]) << 40; + [[fallthrough]]; + case 13: + k2 ^= ((uint64_t)tail[12]) << 32; + [[fallthrough]]; + case 12: + k2 ^= ((uint64_t)tail[11]) << 24; + [[fallthrough]]; + case 11: + k2 ^= ((uint64_t)tail[10]) << 16; + [[fallthrough]]; + case 10: + k2 ^= ((uint64_t)tail[9]) << 8; + [[fallthrough]]; + case 9: + k2 ^= ((uint64_t)tail[8]) << 0; + k2 *= c2; + k2 = ROTL64(k2, 33); + k2 *= c1; + h2 ^= k2; + [[fallthrough]]; + + case 8: + k1 ^= ((uint64_t)tail[7]) << 56; + [[fallthrough]]; + case 7: + k1 ^= ((uint64_t)tail[6]) << 48; + [[fallthrough]]; + case 6: + k1 ^= ((uint64_t)tail[5]) << 40; + [[fallthrough]]; + case 5: + k1 ^= ((uint64_t)tail[4]) << 32; + [[fallthrough]]; + case 4: + k1 ^= ((uint64_t)tail[3]) << 24; + [[fallthrough]]; + case 3: + k1 ^= ((uint64_t)tail[2]) << 16; + [[fallthrough]]; + case 2: + k1 ^= ((uint64_t)tail[1]) << 8; + [[fallthrough]]; + case 1: + k1 ^= ((uint64_t)tail[0]) << 0; + k1 *= c1; + k1 = ROTL64(k1, 31); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} + +//----------------------------------------------------------------------------- diff --git a/src/MurmurHash3.h b/src/MurmurHash3.h new file mode 100644 index 000000000..e73990396 --- /dev/null +++ b/src/MurmurHash3.h @@ -0,0 +1,37 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards +#define _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) && (_MSC_VER < 1600) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + + // Other compilers + +#else // defined(_MSC_VER) + + #include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out); + +void MurmurHash3_x86_128(const void* key, int len, uint32_t seed, void* out); + +void MurmurHash3_x64_128(const void* key, int len, uint32_t seed, void* out); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH3_H_ diff --git a/src/rootUtils.h b/src/rootUtils.h index 7c6311c83..507d24b15 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -135,7 +135,7 @@ inline void setCollectionAddresses(const BufferT& collBuffers, const CollectionB // A collection of additional information that describes the collection: the // collectionID, the collection (data) type, whether it is a subset // collection, and its schema version -using CollectionInfoT = std::tuple; +using CollectionInfoT = std::tuple; // for backwards compatibility using CollectionInfoWithoutSchemaT = std::tuple; diff --git a/src/selection.xml b/src/selection.xml index b1a9694c1..4daca9192 100644 --- a/src/selection.xml +++ b/src/selection.xml @@ -15,6 +15,9 @@ + + + diff --git a/src/sioUtils.h b/src/sioUtils.h index 204867eaf..82297456b 100644 --- a/src/sioUtils.h +++ b/src/sioUtils.h @@ -46,7 +46,7 @@ namespace sio_utils { subsetColl.reserve(collections.size()); std::vector names; names.reserve(collections.size()); - std::vector ids; + std::vector ids; ids.reserve(collections.size()); for (const auto& [name, coll] : collections) { diff --git a/src/test_hashes.cpp b/src/test_hashes.cpp new file mode 100644 index 000000000..41168ae58 --- /dev/null +++ b/src/test_hashes.cpp @@ -0,0 +1,126 @@ +#include "MurmurHash3.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +auto readCollNames(const std::string& fileName) { + std::vector collNames{}; + + std::ifstream inputFile(fileName); + if (!inputFile.is_open()) { + std::cerr << "Failed to open file \'" << fileName << "\' for reading collection names" << std::endl; + return collNames; + } + + std::string name; + while (inputFile >> name) { + collNames.emplace_back(std::move(name)); + } + + return collNames; +} + +/// Hash all passed strings using the passed in HashFunc with an interface like +/// the MurmurHash3 methods +template +auto hashStrings(const std::vector& strings, HashFunc hashFunc) { + std::vector hashes; + hashes.reserve(strings.size()); + + for (const auto& s : strings) { + HashT id = 0; + hashFunc(s.c_str(), s.size(), 0, &id); + hashes.emplace_back(id); + } + + return hashes; +} + +/// Hash all the passed in strings and check for collisions. Returns a vector of +/// Hashes and the corresponding colliding strings. Empty vector corresponds to +/// no collisions +template +auto getCollisions(const std::vector& strings, HashFunc hashFunc) { + auto hashes = hashStrings(strings, hashFunc); + + // Use a multimap for collision detection + std::multimap hashMap{}; + for (size_t i = 0; i < hashes.size(); ++i) { + hashMap.emplace(hashes[i], strings[i]); + } + + std::vector>> collidingStrings; + auto firstIt = hashMap.begin(); + while (firstIt != hashMap.end()) { + auto rangeIts = hashMap.equal_range(firstIt->first); + if (std::distance(rangeIts.first, rangeIts.second) != 1) { + std::vector names; + names.reserve(2); // Most likely case hopefully + for (auto it = rangeIts.first; it != rangeIts.second; ++it) { + names.emplace_back(it->second); + } + + collidingStrings.emplace_back(rangeIts.first->first, std::move(names)); + } + + firstIt = rangeIts.second; + } + + return collidingStrings; +} + +template +std::ostream& operator<<(std::ostream& os, const std::vector& vec) { + os << '['; + if (!vec.empty()) { + os << vec[0]; + } + for (size_t i = 1; i < vec.size(); ++i) { + os << ", " << vec[i]; + } + return os << ']'; +} + +constexpr static auto usage = R"USAGE(usage: podio_test_hashes [-h] collNameFile)USAGE"; +constexpr static auto help = R"HELP( +Check if any of the collection names provided lead to a collision in the collection IDs + +positional arguments: + collNameFile a text file containing all collection names to be checked + +optional arguments: + -h, --help show this help message and exit +)HELP"; + +int main(int argc, char* argv[]) { + if (argc == 1) { + std::cerr << usage << std::endl; + return 1; + } + if (argc == 2 && (argv[1] == std::string("-h") || argv[1] == std::string("--help"))) { + std::cerr << usage << '\n' << help << std::endl; + return 0; + } + + const auto collNames = readCollNames(argv[1]); + const auto collisions = getCollisions(collNames, MurmurHash3_x86_32); + + if (!collisions.empty()) { + std::cerr << "Found collisions between names" << std::endl; + std::cout << "hash: " << '\n'; + for (const auto& [hash, colls] : collisions) { + std::cout << std::hex << std::setw(8) << std::setfill('0') << hash << ": " << colls << '\n'; + } + + return 1; + } + + return 0; +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 344d910db..9683e6d31 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -237,6 +237,8 @@ if (NOT FORCE_RUN_ALL_TESTS) set(filter_tests "~[LEAK-FAIL]") elseif(USE_SANITIZER MATCHES "Thread") set(filter_tests "~[THREAD-FAIL]") + elseif(USE_SANITIZER MATCHES "Undefined") + set(filter_tests "~[UBSAN-FAIL]") endif() endif() diff --git a/tests/frame.cpp b/tests/frame.cpp index 4d36909f1..e52965a07 100644 --- a/tests/frame.cpp +++ b/tests/frame.cpp @@ -365,3 +365,15 @@ TEST_CASE("Frame parameters multithread insert and read", "[frame][basics][multi REQUIRE(frame.getParameter(makeName("string", i)) == std::to_string(i)); } } + +TEST_CASE("Frame double insert", "[frame][basics]") { + auto event = podio::Frame(); + auto clusters = ExampleClusterCollection(); + clusters.create(3.14f); + clusters.create(42.0f); + auto other_clusters = ExampleClusterCollection(); + other_clusters.create(23.0f); + + event.put(std::move(clusters), "clusters"); + REQUIRE_THROWS_AS(event.put(std::move(other_clusters), "clusters"), std::invalid_argument); +} diff --git a/tests/unittest.cpp b/tests/unittest.cpp index 77e14727f..6903a2e2b 100644 --- a/tests/unittest.cpp +++ b/tests/unittest.cpp @@ -79,7 +79,7 @@ TEST_CASE("Assignment-operator ref count", "[basics][memory-management]") { } } -TEST_CASE("Clearing", "[ASAN-FAIL][THREAD-FAIL][basics][memory-management]") { +TEST_CASE("Clearing", "[UBSAN-FAIL][ASAN-FAIL][THREAD-FAIL][basics][memory-management]") { bool success = true; auto store = podio::EventStore(); auto& hits = store.create("hits"); From fda9213319409a74896c33b7e008219e1d1c31ab Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 8 Jun 2023 16:00:17 +0200 Subject: [PATCH 076/100] Introduce PODIO_SIOBLOCK_PATH for more robustness (#426) --- src/SIOBlock.cc | 12 ++++++++++-- tests/CMakeLists.txt | 10 +++++++++- tools/CMakeLists.txt | 2 ++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index 981f7df47..52d5565f8 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -159,11 +159,19 @@ std::vector> SIOBlockLibraryLoader::getLibN #endif std::vector> libs; - std::string dir; - const auto ldLibPath = std::getenv("LD_LIBRARY_PATH"); + const auto ldLibPath = []() { + // Check PODIO_SIOBLOCK_PATH first and fall back to LD_LIBRARY_PATH + auto pathVar = std::getenv("PODIO_SIOBLOCK_PATH"); + if (!pathVar) { + pathVar = std::getenv("LD_LIBRARY_PATH"); + } + return pathVar; + }(); if (!ldLibPath) { return libs; } + + std::string dir; std::istringstream stream(ldLibPath); while (std::getline(stream, dir, ':')) { if (not fs::exists(dir)) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9683e6d31..90f2649b5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -50,6 +50,8 @@ function(CREATE_PODIO_TEST sourcefile additional_libs) # Clear the ROOT_INCLUDE_PATH for the tests, to avoid potential conflicts # with existing headers from other installations ROOT_INCLUDE_PATH= + # Only pick up this build for testing + PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} ) endfunction() @@ -211,6 +213,8 @@ set_property(TEST pyunittest PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH} ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH} SKIP_SIO_TESTS=$> + # Only pick up this build for testing + PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} ) set_property(TEST pyunittest PROPERTY DEPENDS write write_frame_root) if (TARGET write_sio) @@ -256,6 +260,8 @@ if (USE_SANITIZER MATCHES "Memory(WithOrigin)?" OR SKIP_CATCH_DISCOVERY) set_property(TEST unittest PROPERTY ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + # Only pick up this build for testing + PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} ) endif() else() @@ -267,6 +273,8 @@ else() PROPERTIES ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + # Only pick up this build for testing + PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} ) endif() @@ -333,5 +341,5 @@ set_tests_properties( PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ENVIRONMENT - "PODIO_BASE=${CMAKE_SOURCE_DIR};IO_HANDLERS=${IO_HANDLERS};ENABLE_SIO=${ENABLE_SIO};PODIO_USE_CLANG_FORMAT=${PODIO_USE_CLANG_FORMAT};LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH};PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH};ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH}" + "PODIO_BASE=${CMAKE_SOURCE_DIR};IO_HANDLERS=${IO_HANDLERS};ENABLE_SIO=${ENABLE_SIO};PODIO_USE_CLANG_FORMAT=${PODIO_USE_CLANG_FORMAT};LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH};PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH};ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH};PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR}" ) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index c0765c28a..f86e38210 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -21,6 +21,8 @@ if(BUILD_TESTING) LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/tests:${CMAKE_BINARY_DIR}/src:$:${SIO_LD_PATH}:$ENV{LD_LIBRARY_PATH} PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH} ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH} + # Only pick up this build for testing + PODIO_SIOBLOCK_PATH=${CMAKE_BINARY_DIR}/tests ) set_tests_properties(${name} PROPERTIES From 930e6001e893bef9415639710e12b67b5deba5d6 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 9 Jun 2023 11:44:27 +0200 Subject: [PATCH 077/100] Add static constexpr char* names and hand out views for them (#402) * Add static constexpr char* names and hand out views for them * Work around Catch2 problem in test discovery * Add AUTO as possible option to USE_EXTERNAL_CATCH2 * Switch key4hep workflows to build Catch2 if necessary --- .github/workflows/key4hep.yml | 2 +- CMakeLists.txt | 3 ++- include/podio/CollectionBase.h | 8 ++++---- include/podio/SIOBlock.h | 2 +- include/podio/UserDataCollection.h | 16 ++++++++++------ python/podio/test_Frame.py | 2 +- python/templates/Collection.h.jinja2 | 10 +++++++--- src/ROOTFrameWriter.cc | 2 +- src/ROOTReader.cc | 2 +- src/ROOTWriter.cc | 2 +- src/SIOBlock.cc | 4 ++-- src/SIOWriter.cc | 2 +- tests/CMakeLists.txt | 27 +++++++++++++++++++-------- tools/podio-dump | 2 +- 14 files changed, 52 insertions(+), 32 deletions(-) diff --git a/.github/workflows/key4hep.yml b/.github/workflows/key4hep.yml index f831301fe..92240b93e 100644 --- a/.github/workflows/key4hep.yml +++ b/.github/workflows/key4hep.yml @@ -30,7 +30,7 @@ jobs: -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ - -DUSE_EXTERNAL_CATCH2=ON \ + -DUSE_EXTERNAL_CATCH2=AUTO \ -G Ninja .. echo "::endgroup::" echo "::group::Build" diff --git a/CMakeLists.txt b/CMakeLists.txt index 577fac281..e73859707 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,7 +142,8 @@ endif() #--- enable unit testing capabilities ------------------------------------------ include(CTest) -option(USE_EXTERNAL_CATCH2 "Link against an external Catch2 v3 static library, otherwise build it locally" ON) +set(USE_EXTERNAL_CATCH2 AUTO CACHE STRING "Link against an external Catch2 v3 static library, otherwise build it locally") +set_property(CACHE USE_EXTERNAL_CATCH2 PROPERTY STRINGS AUTO ON OFF) #--- enable CPack -------------------------------------------------------------- diff --git a/include/podio/CollectionBase.h b/include/podio/CollectionBase.h index d502c124b..6e5b8ded4 100644 --- a/include/podio/CollectionBase.h +++ b/include/podio/CollectionBase.h @@ -6,7 +6,7 @@ #include "podio/SchemaEvolution.h" #include -#include +#include #include #include @@ -56,11 +56,11 @@ class CollectionBase { virtual size_t size() const = 0; /// fully qualified type name - virtual std::string getTypeName() const = 0; + virtual const std::string_view getTypeName() const = 0; /// fully qualified type name of elements - with namespace - virtual std::string getValueTypeName() const = 0; + virtual const std::string_view getValueTypeName() const = 0; /// fully qualified type name of stored POD elements - with namespace - virtual std::string getDataTypeName() const = 0; + virtual const std::string_view getDataTypeName() const = 0; /// schema version of the collection virtual SchemaVersionT getSchemaVersion() const = 0; diff --git a/include/podio/SIOBlock.h b/include/podio/SIOBlock.h index e7c917d27..aa5e58c71 100644 --- a/include/podio/SIOBlock.h +++ b/include/podio/SIOBlock.h @@ -220,7 +220,7 @@ class SIOBlockFactory { private: SIOBlockFactory() = default; - typedef std::map BlockMap; + typedef std::unordered_map BlockMap; BlockMap _map{}; public: diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index dcf726806..b9aefaf40 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -93,6 +93,10 @@ class UserDataCollection : public CollectionBase { /// The schema version of UserDataCollections static constexpr SchemaVersionT schemaVersion = 1; + constexpr static auto typeName = userDataCollTypeName(); + constexpr static auto valueTypeName = userDataTypeName(); + constexpr static auto dataTypeName = userDataTypeName(); + /// prepare buffers for serialization void prepareForWrite() const override { } @@ -133,18 +137,18 @@ class UserDataCollection : public CollectionBase { } /// fully qualified type name - std::string getTypeName() const override { - return userDataCollTypeName(); + const std::string_view getTypeName() const override { + return typeName; } /// fully qualified type name of elements - with namespace - std::string getValueTypeName() const override { - return userDataTypeName(); + const std::string_view getValueTypeName() const override { + return valueTypeName; } /// fully qualified type name of stored POD elements - with namespace - std::string getDataTypeName() const override { - return userDataTypeName(); + const std::string_view getDataTypeName() const override { + return dataTypeName; } /// clear the collection and all internal states diff --git a/python/podio/test_Frame.py b/python/podio/test_Frame.py index 8f4337c4f..dfdc8d426 100644 --- a/python/podio/test_Frame.py +++ b/python/podio/test_Frame.py @@ -58,7 +58,7 @@ def test_frame_collections(self): # Not going over all collections here, as that should all be covered by the # c++ test cases; Simply picking a few and doing some basic tests mc_particles = self.event.get('mcparticles') - self.assertEqual(mc_particles.getValueTypeName(), 'ExampleMC') + self.assertEqual(mc_particles.getValueTypeName().data(), 'ExampleMC') self.assertEqual(len(mc_particles), 10) self.assertEqual(len(mc_particles[0].daughters()), 4) diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index cbc3945ba..4ef0df066 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -62,6 +62,10 @@ public: // {{ class.bare_type }}Collection({{ class.bare_type }}Vector* data, uint32_t collectionID); ~{{ class.bare_type }}Collection(); + constexpr static auto typeName = "{{ (class | string ).strip(':') + "Collection" }}"; + constexpr static auto valueTypeName = "{{ (class | string ).strip(':') }}"; + constexpr static auto dataTypeName = "{{ (class | string ).strip(':') + "Data" }}"; + void clear() final; /// Print this collection to the passed stream @@ -82,11 +86,11 @@ public: std::size_t size() const final; /// fully qualified type name - std::string getTypeName() const final { return std::string("{{ (class | string ).strip(':')+"Collection" }}"); } + const std::string_view getTypeName() const final { return typeName; } /// fully qualified type name of elements - with namespace - std::string getValueTypeName() const final { return std::string("{{ (class | string ).strip(':') }}"); } + const std::string_view getValueTypeName() const final { return valueTypeName; } /// fully qualified type name of stored POD elements - with namespace - std::string getDataTypeName() const final { return std::string("{{ (class | string ).strip(':')+"Data" }}"); } + const std::string_view getDataTypeName() const final { return dataTypeName; } /// schema version podio::SchemaVersionT getSchemaVersion() const final; diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index 5c1cc195d..e24523da7 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -76,7 +76,7 @@ void ROOTFrameWriter::initBranches(CategoryInfo& catInfo, const std::vectorBranch(brName.c_str(), refColl.get())); } else { // For "proper" collections we populate all branches, starting with the data - auto bufferDataType = "vector<" + coll->getDataTypeName() + ">"; + const auto bufferDataType = "vector<" + std::string(coll->getDataTypeName()) + ">"; branches.data = catInfo.tree->Branch(name.c_str(), bufferDataType.c_str(), buffers.data); const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(coll->getValueTypeName()); diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 57bf1500f..768e099f3 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -284,7 +284,7 @@ void ROOTReader::createCollectionBranches(const std::vectorgetDataTypeName() + ">"; + const auto bufferClassName = "std::vector<" + std::string(collection->getDataTypeName()) + ">"; const auto bufferClass = isSubsetColl ? nullptr : TClass::GetClass(bufferClassName.c_str()); m_storedClasses.emplace(name, std::make_tuple(bufferClass, collectionClass, collectionIndex++)); diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index cf0a768e6..6159c52f0 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -56,7 +56,7 @@ void ROOTWriter::createBranches(const std::vector& collections) if (collBuffers.data) { // only create the data buffer branch if necessary - auto collClassName = "vector<" + coll->getDataTypeName() + ">"; + const auto collClassName = "vector<" + std::string(coll->getDataTypeName()) + ">"; branches.data = m_datatree->Branch(name.c_str(), collClassName.c_str(), collBuffers.data); } diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index 52d5565f8..d59dc5fae 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -27,7 +27,7 @@ SIOCollectionIDTableBlock::SIOCollectionIDTableBlock(podio::EventStore* store) : << id << ", name: " << table->name(id) << ")" << std::endl; } - _types.push_back(tmp->getValueTypeName()); + _types.emplace_back(tmp->getValueTypeName()); _isSubsetColl.push_back(tmp->isSubsetCollection()); } } @@ -109,7 +109,7 @@ std::shared_ptr SIOBlockFactory::createBlock(const std::string& typeSt std::shared_ptr SIOBlockFactory::createBlock(const podio::CollectionBase* col, const std::string& name) const { - const std::string typeStr = col->getValueTypeName(); + const auto typeStr = std::string(col->getValueTypeName()); // Need c++20 for transparent lookup const auto it = _map.find(typeStr); if (it != _map.end()) { diff --git a/src/SIOWriter.cc b/src/SIOWriter.cc index cced3e28c..7cff1fa98 100644 --- a/src/SIOWriter.cc +++ b/src/SIOWriter.cc @@ -98,7 +98,7 @@ void SIOWriter::registerForWrite(const std::string& name) { } // Check if we can instantiate the blocks here so that we can skip the checks later if (auto blk = podio::SIOBlockFactory::instance().createBlock(colB, name); !blk) { - const auto typName = colB->getValueTypeName(); + const auto typName = std::string(colB->getValueTypeName()); throw std::runtime_error(std::string("could not create SIOBlock for type: ") + typName); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 90f2649b5..1b32b4101 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -128,8 +128,14 @@ CREATE_PODIO_TEST(ostream_operator.cpp "") CREATE_PODIO_TEST(write_ascii.cpp "") if(USE_EXTERNAL_CATCH2) - find_package(Catch2 3 REQUIRED) -else() + if (USE_EXTERNAL_CATCH2 STREQUAL AUTO) + find_package(Catch2 3.1) + else() + find_package(Catch2 3.1 REQUIRED) + endif() +endif() + +if(NOT Catch2_FOUND) message(STATUS "Fetching local copy of Catch2 library for unit-tests...") # Build Catch2 with the default flags, to avoid generating warnings when we # build it @@ -139,7 +145,7 @@ else() FetchContent_Declare( Catch2 GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.0.1 + GIT_TAG v3.1.0 ) FetchContent_MakeAvailable(Catch2) set(CMAKE_MODULE_PATH ${Catch2_SOURCE_DIR}/extras ${CMAKE_MODULE_PATH}) @@ -248,6 +254,14 @@ endif() option(SKIP_CATCH_DISCOVERY "Skip the Catch2 test discovery" OFF) +# To work around https://github.com/catchorg/Catch2/issues/2424 we need the +# DL_PATH argument for catch_discoer_tests which requires CMake 3.22 at least +# The whole issue can be avoied if we skip the catch test discovery and set the +# environment on our own +if (CMAKE_VERSION VERSION_LESS 3.22) + set(SKIP_CATCH_DISCOVERY ON) +endif() + if (USE_SANITIZER MATCHES "Memory(WithOrigin)?" OR SKIP_CATCH_DISCOVERY) # Automatic test discovery fails with Memory sanitizers due to some issues in # Catch2. So in that case we skip the discovery step and simply run the thing @@ -259,9 +273,7 @@ if (USE_SANITIZER MATCHES "Memory(WithOrigin)?" OR SKIP_CATCH_DISCOVERY) add_test(NAME unittest COMMAND unittest ${filter_tests}) set_property(TEST unittest PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} - # Only pick up this build for testing - PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} + "LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH};PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR}" ) endif() else() @@ -270,10 +282,9 @@ else() WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} TEST_PREFIX "UT_" # make it possible to filter easily with -R ^UT TEST_SPEC ${filter_tests} # discover only tests that are known to not fail + DL_PATHS ${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} PROPERTIES ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} - # Only pick up this build for testing PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} ) endif() diff --git a/tools/podio-dump b/tools/podio-dump index 8685aa19e..1cf044d80 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -53,7 +53,7 @@ def print_frame(frame, cat_name, ientry, detailed): coll.print() print(flush=True) else: - print(f'{name:<38} {coll.getID():<4} {coll.getValueTypeName():<32} {len(coll):<10}') + print(f'{name:<38} {coll.getID():<4} {coll.getValueTypeName().data():<32} {len(coll):<10}') # And then parameters print('\nParameters:', flush=True) From adbd1ea6139a35bbccef7993f783946fb793a425 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 15 Jun 2023 09:53:27 +0200 Subject: [PATCH 078/100] Improve podio dump (#427) * Delay library loading as long as possible * Add a --version flag for dumping the podio version * Dump collections in alphabetical order * Use tabulate for easier printing of overview table * Upate README with new requirements * [format] Fix pylint and flake8 issues --- README.md | 4 ++- requirements.txt | 1 + tools/podio-dump | 76 +++++++++++++++++++++++++++++++----------------- 3 files changed, 53 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 97f7a51a0..58c003219 100755 --- a/README.md +++ b/README.md @@ -66,7 +66,9 @@ In order for the `yaml` module to be working it might also be necessary to insta Check that you can now import the `yaml` and `jinja2` modules in python. -Optionally, `graphviz` is also required for the visualization tool `podio-vis`. +Some tools have additional dependencies that are not required for code generation or library use +- `graphviz` is required for `podio-vis` +- `tabulate` is required for `podio-dump` ## Preparing the environment diff --git a/requirements.txt b/requirements.txt index 92b056252..ce095ebd2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ # From pypi pyyaml jinja2 +tabulate diff --git a/tools/podio-dump b/tools/podio-dump index 1cf044d80..d980e0a5f 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -5,7 +5,9 @@ import sys import json import yaml -from podio.reading import get_reader +from tabulate import tabulate + +from podio import __version__ def print_general_info(reader, filename): @@ -29,8 +31,49 @@ def print_general_info(reader, filename): print() +def print_frame_detailed(frame): + """Print the Frame in all its glory, dumping every collection via print + + Args: + frame (podio.Frame): The frame to print + """ + print('Collections:') + for name in sorted(frame.collections, key=str.casefold): + coll = frame.get(name) + print(name, flush=True) + coll.print() + print(flush=True) + + print('\nParameters:', flush=True) + frame.get_parameters().print() + print(flush=True) + + +def print_frame_overview(frame): + """Print a Frame overview, dumping just collection names, types and sizes + + Args: + frame (podio.Frame): The frame to print + """ + rows = [] + for name in sorted(frame.collections, key=str.casefold): + coll = frame.get(name) + rows.append( + (name, coll.getValueTypeName().data(), len(coll), f'{coll.getID():0>8x}') + ) + print('Collections:') + print(tabulate(rows, headers=["Name", "ValueType", "Size", "ID"])) + + rows = [] + for name in sorted(frame.parameters, key=str.casefold): + for par_type, n_pars in frame.get_param_info(name).items(): + rows.append([name, par_type, n_pars]) + print('\nParameters:') + print(tabulate(rows, headers=["Name", "Type", "Elements"])) + + def print_frame(frame, cat_name, ientry, detailed): - """Print a Frame overview. + """Print a Frame. Args: frame (podio.Frame): The frame to print @@ -39,34 +82,11 @@ def print_frame(frame, cat_name, ientry, detailed): detailed (bool): Print just an overview or dump the whole contents """ print('{:#^82}'.format(f' {cat_name} {ientry} ')) # pylint: disable=consider-using-f-string - print('Collections:') - - if not detailed: - print(f'{"Name":<38} {"ID":<4} {"Type":<32} {"Size":<10}') - print('-' * 82) - # Print collections - for name in frame.collections: - coll = frame.get(name) - if detailed: - print(name, flush=True) - coll.print() - print(flush=True) - else: - print(f'{name:<38} {coll.getID():<4} {coll.getValueTypeName().data():<32} {len(coll):<10}') - - # And then parameters - print('\nParameters:', flush=True) if detailed: - frame.get_parameters().print() - print(flush=True) + print_frame_detailed(frame) else: - print(f'{"Name":<30} {"Type":<12} {"Elements":<10}') - print('-' * 54) - for name in frame.parameters: - par_infos = frame.get_param_info(name) - for par_type, n_pars in par_infos.items(): - print(f'{name:<30} {par_type:<12} {n_pars:<10}') + print_frame_overview(frame) # Additional new line before the next entry print('\n', flush=True) @@ -86,6 +106,7 @@ def dump_model(reader, model_name): def main(args): """Main""" + from podio.reading import get_reader # pylint: disable=import-outside-toplevel try: reader = get_reader(args.inputfile) except ValueError as err: @@ -145,6 +166,7 @@ if __name__ == '__main__': parser.add_argument('--dump-edm', help='Dump the specified EDM definition from the file in yaml format', type=str, default=None) + parser.add_argument('--version', action='version', version=f'podio {__version__}') clargs = parser.parse_args() main(clargs) From 0f5acd6567b5cbe801f6f580d6102868175888d2 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 15 Jun 2023 10:37:45 +0200 Subject: [PATCH 079/100] Reorganize the CMake configuration for the tests (#428) * Fix repeated download of test inputs if they are alrady present * Split tests into subdirectories to declutter things a bit * Move test utility functionality into dedicated file - Introduce PODIO_SET_TEST_ENV function to set consistent test environment * Move dumpmodel roundtrip tests into separate subfolder * Move commonly used declaration to separate header (Fixes clang-tidy not being able to find an included header otherwise) --- CMakeLists.txt | 3 +- cmake/podioTest.cmake | 52 +++ python/CMakeLists.txt | 12 +- python/podio/test_EventStoreRoot.py | 4 +- python/podio/test_EventStoreSio.py | 2 +- python/podio/test_Frame.py | 2 +- python/podio/test_ReaderRoot.py | 4 +- python/podio/test_ReaderSio.py | 4 +- tests/CMakeLists.txt | 320 +----------------- tests/dumpmodel/CMakeLists.txt | 63 ++++ tests/frame_test_common.h | 28 ++ tests/read_frame_auxiliary.h | 2 +- tests/root_io/CMakeLists.txt | 61 ++++ .../{ => root_io}/read-legacy-files-root.cpp | 0 tests/{ => root_io}/read-multiple.cpp | 0 tests/{ => root_io}/read.cpp | 0 tests/{ => root_io}/read_and_write.cpp | 0 .../read_and_write_associated.cpp | 0 .../{ => root_io}/read_frame_legacy_root.cpp | 0 tests/{ => root_io}/read_frame_root.cpp | 0 .../read_frame_root_multiple.cpp | 0 tests/{ => root_io}/read_timed.cpp | 0 tests/{ => root_io}/relation_range.cpp | 0 tests/{ => root_io}/write.cpp | 0 tests/{ => root_io}/write_frame_root.cpp | 0 tests/{ => root_io}/write_timed.cpp | 0 tests/sio_io/CMakeLists.txt | 32 ++ tests/{ => sio_io}/read_and_write_sio.cpp | 0 tests/{ => sio_io}/read_frame_legacy_sio.cpp | 0 tests/{ => sio_io}/read_frame_sio.cpp | 0 tests/{ => sio_io}/read_sio.cpp | 0 tests/{ => sio_io}/read_timed_sio.cpp | 0 tests/{ => sio_io}/write_frame_sio.cpp | 0 tests/{ => sio_io}/write_sio.cpp | 0 tests/{ => sio_io}/write_timed_sio.cpp | 0 tests/unittests/CMakeLists.txt | 91 +++++ tests/{ => unittests}/frame.cpp | 0 tests/{ => unittests}/unittest.cpp | 0 tests/write_frame.h | 24 +- tools/CMakeLists.txt | 26 +- 40 files changed, 372 insertions(+), 358 deletions(-) create mode 100644 cmake/podioTest.cmake create mode 100644 tests/dumpmodel/CMakeLists.txt create mode 100644 tests/frame_test_common.h create mode 100644 tests/root_io/CMakeLists.txt rename tests/{ => root_io}/read-legacy-files-root.cpp (100%) rename tests/{ => root_io}/read-multiple.cpp (100%) rename tests/{ => root_io}/read.cpp (100%) rename tests/{ => root_io}/read_and_write.cpp (100%) rename tests/{ => root_io}/read_and_write_associated.cpp (100%) rename tests/{ => root_io}/read_frame_legacy_root.cpp (100%) rename tests/{ => root_io}/read_frame_root.cpp (100%) rename tests/{ => root_io}/read_frame_root_multiple.cpp (100%) rename tests/{ => root_io}/read_timed.cpp (100%) rename tests/{ => root_io}/relation_range.cpp (100%) rename tests/{ => root_io}/write.cpp (100%) rename tests/{ => root_io}/write_frame_root.cpp (100%) rename tests/{ => root_io}/write_timed.cpp (100%) create mode 100644 tests/sio_io/CMakeLists.txt rename tests/{ => sio_io}/read_and_write_sio.cpp (100%) rename tests/{ => sio_io}/read_frame_legacy_sio.cpp (100%) rename tests/{ => sio_io}/read_frame_sio.cpp (100%) rename tests/{ => sio_io}/read_sio.cpp (100%) rename tests/{ => sio_io}/read_timed_sio.cpp (100%) rename tests/{ => sio_io}/write_frame_sio.cpp (100%) rename tests/{ => sio_io}/write_sio.cpp (100%) rename tests/{ => sio_io}/write_timed_sio.cpp (100%) create mode 100644 tests/unittests/CMakeLists.txt rename tests/{ => unittests}/frame.cpp (100%) rename tests/{ => unittests}/unittest.cpp (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index e73859707..de56ea75a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,12 +169,13 @@ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/NOTICE DESTINATION ${CMAKE_INSTALL_DOCDIR}) #--- project specific subdirectories ------------------------------------------- -add_subdirectory(python) add_subdirectory(src) if(BUILD_TESTING) + include(cmake/podioTest.cmake) add_subdirectory(tests) endif() add_subdirectory(tools) +add_subdirectory(python) #--- add CMake infrastructure -------------------------------------------------- include(cmake/podioCreateConfig.cmake) diff --git a/cmake/podioTest.cmake b/cmake/podioTest.cmake new file mode 100644 index 000000000..ebc966a83 --- /dev/null +++ b/cmake/podioTest.cmake @@ -0,0 +1,52 @@ +#--- small utility helper function to set a consistent test environment for the passed test + +function(PODIO_SET_TEST_ENV test) + # We need to convert this into a list of arguments that can be used as environment variable + list(JOIN PODIO_IO_HANDLERS " " IO_HANDLERS) + set_property(TEST ${test} + PROPERTY ENVIRONMENT + LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/tests:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH} + PODIO_SIOBLOCK_PATH=${CMAKE_BINARY_DIR}/tests + ROOT_INCLUDE_PATH=${CMAKE_BINARY_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include + SKIP_SIO_TESTS=$> + IO_HANDLERS=${IO_HANDLERS} + PODIO_USE_CLANG_FORMAT=${PODIO_USE_CLANG_FORMAT} + PODIO_BASE=${CMAKE_SOURCE_DIR} + ENABLE_SIO=${ENABLE_SIO} + ) +endfunction() + +#--- small utility helper function to allow for a more terse definition of tests below +function(CREATE_PODIO_TEST sourcefile additional_libs) + string( REPLACE ".cpp" "" name ${sourcefile} ) + add_executable( ${name} ${sourcefile} ) + add_test(NAME ${name} COMMAND ${name}) + + target_link_libraries(${name} PRIVATE TestDataModel ExtensionDataModel ${additional_libs}) + PODIO_SET_TEST_ENV(${name}) +endfunction() + +#--- utility macro to facilitate the downloading of legacy input data +macro(PODIO_DOWNLOAD_LEGACY_INPUTS) + # Avoid fetching these everytime cmake is run by caching the directory the first + # time the inputs are fetched or if the expected file does not exist in the + # expected directory + if (NOT DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR} OR NOT EXISTS ${PODIO_TEST_INPUT_DATA_DIR}/v00-16-05/example_frame.root) + message(STATUS "Getting test input files") + execute_process( + COMMAND bash ${CMAKE_SOURCE_DIR}/tests/scripts/get_test_inputs.sh + OUTPUT_VARIABLE podio_test_input_data_dir + RESULT_VARIABLE test_inputs_available + ) + if (NOT "${test_inputs_available}" STREQUAL "0") + message(WARNING "Could not get test input files. Will skip some tests that depend on these") + # Catch cases where the variable is cached but the file no longer exists + unset(PODIO_TEST_INPUT_DATA_DIR CACHE) + else() + message(STATUS "Test inputs stored in: " ${podio_test_input_data_dir}) + set(PODIO_TEST_INPUT_DATA_DIR ${podio_test_input_data_dir} CACHE INTERNAL "input dir for test inputs fetched from remote sources") + mark_as_advanced(PODIO_TEST_INPUT_DATA_DIR) + endif() + endif() +endmacro() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 6f4fd2657..9a7e39bd7 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,6 +1,5 @@ SET(podio_PYTHON_INSTALLDIR python) SET(podio_PYTHON_INSTALLDIR ${podio_PYTHON_INSTALLDIR} PARENT_SCOPE) -SET(podio_PYTHON_DIR ${CMAKE_CURRENT_LIST_DIR} PARENT_SCOPE) set(to_install podio_class_generator.py @@ -27,3 +26,14 @@ endif() #--- install templates --------------------------------------------------------- install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/templates DESTINATION ${podio_PYTHON_INSTALLDIR}) + +IF (BUILD_TESTING) + add_test( NAME pyunittest COMMAND python3 -m unittest discover -s ${CMAKE_SOURCE_DIR}/python/podio) + PODIO_SET_TEST_ENV(pyunittest) + + set_property(TEST pyunittest PROPERTY DEPENDS write write_frame_root) + if (TARGET write_sio) + set_property(TEST pyunittest PROPERTY DEPENDS write_sio write_frame_sio) + endif() + set_property(TEST pyunittest PROPERTY WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/tests) +ENDIF() diff --git a/python/podio/test_EventStoreRoot.py b/python/podio/test_EventStoreRoot.py index e9c334e4e..b9f3830dd 100644 --- a/python/podio/test_EventStoreRoot.py +++ b/python/podio/test_EventStoreRoot.py @@ -14,9 +14,9 @@ class EventStoreRootTestCase(EventStoreBaseTestCaseMixin, unittest.TestCase): """Test cases for root input files""" def setUp(self): """Setup an EventStore reading from a ROOT file""" - self.filename = 'example.root' + self.filename = 'root_io/example.root' self.assertTrue(os.path.isfile(self.filename)) - self.store = EventStore(['example.root']) + self.store = EventStore([self.filename]) def test_chain(self): self.store = EventStore([self.filename, diff --git a/python/podio/test_EventStoreSio.py b/python/podio/test_EventStoreSio.py index 1859fecf6..511da07d8 100644 --- a/python/podio/test_EventStoreSio.py +++ b/python/podio/test_EventStoreSio.py @@ -14,7 +14,7 @@ class EventStoreSioTestCase(EventStoreBaseTestCaseMixin, unittest.TestCase): """Test cases for root input files""" def setUp(self): """setup an EventStore reading an SIO file""" - self.filename = 'example.sio' + self.filename = 'sio_io/example.sio' self.assertTrue(os.path.isfile(self.filename)) self.store = EventStore([self.filename]) diff --git a/python/podio/test_Frame.py b/python/podio/test_Frame.py index dfdc8d426..f8ec1ad96 100644 --- a/python/podio/test_Frame.py +++ b/python/podio/test_Frame.py @@ -46,7 +46,7 @@ def setUp(self): Reading only one event/Frame of each category here as looping and other basic checks are already handled by the Reader tests """ - reader = Reader('example_frame.root') + reader = Reader('root_io/example_frame.root') self.event = reader.get('events')[0] self.other_event = reader.get('other_events')[7] diff --git a/python/podio/test_ReaderRoot.py b/python/podio/test_ReaderRoot.py index ad7c8000a..44ee32157 100644 --- a/python/podio/test_ReaderRoot.py +++ b/python/podio/test_ReaderRoot.py @@ -11,11 +11,11 @@ class RootReaderTestCase(ReaderTestCaseMixin, unittest.TestCase): """Test cases for root input files""" def setUp(self): """Setup the corresponding reader""" - self.reader = Reader('example_frame.root') + self.reader = Reader('root_io/example_frame.root') class RootLegacyReaderTestCase(LegacyReaderTestCaseMixin, unittest.TestCase): """Test cases for the legacy root input files and reader.""" def setUp(self): """Setup a reader, reading from the example files""" - self.reader = LegacyReader('example.root') + self.reader = LegacyReader('root_io/example.root') diff --git a/python/podio/test_ReaderSio.py b/python/podio/test_ReaderSio.py index 0429aa5d7..83489919d 100644 --- a/python/podio/test_ReaderSio.py +++ b/python/podio/test_ReaderSio.py @@ -13,7 +13,7 @@ class SioReaderTestCase(ReaderTestCaseMixin, unittest.TestCase): def setUp(self): """Setup the corresponding reader""" from podio.sio_io import Reader # pylint: disable=import-outside-toplevel - self.reader = Reader('example_frame.sio') + self.reader = Reader('sio_io/example_frame.sio') @unittest.skipIf(SKIP_SIO_TESTS, "no SIO support") @@ -22,4 +22,4 @@ class SIOLegacyReaderTestCase(LegacyReaderTestCaseMixin, unittest.TestCase): def setUp(self): """Setup a reader, reading from the example files""" from podio.sio_io import LegacyReader # pylint: disable=import-outside-toplevel - self.reader = LegacyReader('example.sio') + self.reader = LegacyReader('sio_io/example.sio') diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1b32b4101..de0d282e6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -5,6 +5,9 @@ foreach( _conf ${CMAKE_CONFIGURATION_TYPES} ) set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_${_conf} ${CMAKE_CURRENT_BINARY_DIR} ) endforeach() +# Set the podio_PYTHON_DIR manually here because the macros below expect it +SET(podio_PYTHON_DIR ${CMAKE_SOURCE_DIR}/python) + PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} OLD_DESCRIPTION datalayout_old.yaml @@ -38,319 +41,20 @@ PODIO_ADD_ROOT_IO_DICT(ExtensionDataModelDict ExtensionDataModel "${ext_headers} PODIO_ADD_SIO_IO_BLOCKS(ExtensionDataModel "${ext_headers}" "${ext_sources}") -#--- small utility helper function to allow for a more terse definition of tests below -function(CREATE_PODIO_TEST sourcefile additional_libs) - string( REPLACE ".cpp" "" name ${sourcefile} ) - add_executable( ${name} ${sourcefile} ) - add_test(NAME ${name} COMMAND ${name}) - - target_link_libraries(${name} PRIVATE TestDataModel ExtensionDataModel ${additional_libs}) - set_property(TEST ${name} PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH} - # Clear the ROOT_INCLUDE_PATH for the tests, to avoid potential conflicts - # with existing headers from other installations - ROOT_INCLUDE_PATH= - # Only pick up this build for testing - PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} - ) -endfunction() - -set(root_dependent_tests - write.cpp - read.cpp - read-multiple.cpp - relation_range.cpp - read_and_write.cpp - read_and_write_associated.cpp - write_timed.cpp - read_timed.cpp - read_frame_root.cpp - write_frame_root.cpp - read_frame_legacy_root.cpp - read_frame_root_multiple.cpp - ) -set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO) -foreach( sourcefile ${root_dependent_tests} ) - CREATE_PODIO_TEST(${sourcefile} "${root_libs}") -endforeach() - -# Avoid fetching these everytime cmake is run by caching the directory the first -# time the inputs are fetched or if the expected file does not exist in the -# expected directory -if (NOT DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR} OR NOT EXISTS ${PODIO_TEST_INPUT_DATA_DIR}/example.root) - message("Getting test input files") - execute_process( - COMMAND bash ${CMAKE_CURRENT_LIST_DIR}/scripts/get_test_inputs.sh - OUTPUT_VARIABLE podio_test_input_data_dir - RESULT_VARIABLE test_inputs_available - ) - if (NOT "${test_inputs_available}" STREQUAL "0") - message(WARNING "Could not get test input files. Will skip some tests that depend on these") - # Catch cases where the variable is cached but the file no longer exists - unset(PODIO_TEST_INPUT_DATA_DIR CACHE) - else() - message(STATUS "Test inputs stored in: " ${podio_test_input_data_dir}) - set(PODIO_TEST_INPUT_DATA_DIR ${podio_test_input_data_dir} CACHE INTERNAL "input dir for test inputs fetched from remote sources") - mark_as_advanced(PODIO_TEST_INPUT_DATA_DIR) - endif() -endif() - -# If the variable is cached and defined now, we have inputs and can add the -# legacy file read test -if (DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR}) - message(STATUS "Using test inputs stored in: " ${PODIO_TEST_INPUT_DATA_DIR}) - add_executable(read-legacy-files-root read-legacy-files-root.cpp) - target_link_libraries(read-legacy-files-root PRIVATE TestDataModel TestDataModelDict podio::podioRootIO) - - # Add a legacy test case based on a base executable and a version for which an - # input file exists - macro(ADD_PODIO_LEGACY_TEST version base_test input_file) - add_test(NAME ${base_test}_${version} COMMAND ${base_test} ${PODIO_TEST_INPUT_DATA_DIR}/${version}/${input_file}) - set_property(TEST ${base_test}_${version} PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH} - # Clear the ROOT_INCLUDE_PATH for the tests, to avoid potential conflicts - # with existing headers from other installations - ROOT_INCLUDE_PATH= - ) - endmacro() - - ADD_PODIO_LEGACY_TEST(v00-13 read-legacy-files-root example.root legacy_test_cases) - - set(legacy_versions v00-16 v00-16-05) - foreach(version IN LISTS legacy_versions) - ADD_PODIO_LEGACY_TEST(${version} read-legacy-files-root example.root legacy_test_cases) - ADD_PODIO_LEGACY_TEST(${version} read_frame_root example_frame.root legacy_test_cases) - endforeach() - -endif() - -CREATE_PODIO_TEST(ostream_operator.cpp "") -CREATE_PODIO_TEST(write_ascii.cpp "") - -if(USE_EXTERNAL_CATCH2) - if (USE_EXTERNAL_CATCH2 STREQUAL AUTO) - find_package(Catch2 3.1) - else() - find_package(Catch2 3.1 REQUIRED) - endif() -endif() - -if(NOT Catch2_FOUND) - message(STATUS "Fetching local copy of Catch2 library for unit-tests...") - # Build Catch2 with the default flags, to avoid generating warnings when we - # build it - set(CXX_FLAGS_CMAKE_USED ${CMAKE_CXX_FLAGS}) - set(CMAKE_CXX_FLAGS ${CXX_FLAGS_CMAKE_DEFAULTS}) - Include(FetchContent) - FetchContent_Declare( - Catch2 - GIT_REPOSITORY https://github.com/catchorg/Catch2.git - GIT_TAG v3.1.0 - ) - FetchContent_MakeAvailable(Catch2) - set(CMAKE_MODULE_PATH ${Catch2_SOURCE_DIR}/extras ${CMAKE_MODULE_PATH}) - - # Disable clang-tidy on external contents - set_target_properties(Catch2 PROPERTIES CXX_CLANG_TIDY "") - - # Hack around the fact, that the include directories are not declared as - # SYSTEM for the targets defined this way. Otherwise warnings can still occur - # in Catch2 code when templates are evaluated (which happens quite a bit) - get_target_property(CATCH2_IF_INC_DIRS Catch2 INTERFACE_INCLUDE_DIRECTORIES) - set_target_properties(Catch2 PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${CATCH2_IF_INC_DIRS}") - - # Reset the flags - set(CMAKE_CXX_FLAGS ${CXX_FLAGS_CMAKE_USED}) -endif() - - -if (TARGET TestDataModelSioBlocks) - set(sio_dependent_tests - write_sio.cpp - read_sio.cpp - read_and_write_sio.cpp - write_timed_sio.cpp - read_timed_sio.cpp - read_frame_sio.cpp - write_frame_sio.cpp - read_frame_legacy_sio.cpp) - set(sio_libs podio::podioSioIO) - foreach( sourcefile ${sio_dependent_tests} ) - CREATE_PODIO_TEST(${sourcefile} "${sio_libs}") - endforeach() - - # These need to be linked against TTree explicitly, since it is not done - # through another library and the TimedReader/Writer decorators are - # header-only wrappers - target_link_libraries(write_timed_sio PRIVATE ROOT::Tree) - target_link_libraries(read_timed_sio PRIVATE ROOT::Tree) -endif() - -#--- set some dependencies between the different tests to ensure input generating ones are run first -set_property(TEST read PROPERTY DEPENDS write) -set_property(TEST read-multiple PROPERTY DEPENDS write) -set_property(TEST read_and_write PROPERTY DEPENDS write) -set_property(TEST read_frame_legacy_root PROPERTY DEPENDS write) -set_property(TEST read_timed PROPERTY DEPENDS write_timed) -set_property(TEST read_frame_root PROPERTY DEPENDS write_frame_root) -set_property(TEST read_frame_root_multiple PROPERTY DEPENDS write_frame_root) +### Define the actual tests +PODIO_DOWNLOAD_LEGACY_INPUTS() add_executable(check_benchmark_outputs check_benchmark_outputs.cpp) target_link_libraries(check_benchmark_outputs PRIVATE ROOT::Tree) -add_test(NAME check_benchmark_outputs COMMAND check_benchmark_outputs write_benchmark_root.root read_benchmark_root.root) -set_property(TEST check_benchmark_outputs PROPERTY DEPENDS read_timed write_timed) +add_subdirectory(root_io) +add_subdirectory(sio_io) +add_subdirectory(unittests) +add_subdirectory(dumpmodel) -if (TARGET read_sio) - set_property(TEST read_sio PROPERTY DEPENDS write_sio) - set_property(TEST read_and_write_sio PROPERTY DEPENDS write_sio) - set_property(TEST read_timed_sio PROPERTY DEPENDS write_timed_sio) - set_property(TEST read_frame_sio PROPERTY DEPENDS write_frame_sio) - set_property(TEST read_frame_legacy_sio PROPERTY DEPENDS write_sio) - - add_test(NAME check_benchmark_outputs_sio COMMAND check_benchmark_outputs write_benchmark_sio.root read_benchmark_sio.root) - set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio write_timed_sio) -endif() - -add_test( NAME pyunittest COMMAND python3 -m unittest discover -s ${CMAKE_SOURCE_DIR}/python/podio) -set_property(TEST pyunittest - PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$ENV{LD_LIBRARY_PATH} - PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH} - ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH} - SKIP_SIO_TESTS=$> - # Only pick up this build for testing - PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} - ) -set_property(TEST pyunittest PROPERTY DEPENDS write write_frame_root) -if (TARGET write_sio) - set_property(TEST pyunittest PROPERTY DEPENDS write_sio write_frame_sio) -endif() +# Tests that don't fit into one of the broad categories above +CREATE_PODIO_TEST(ostream_operator.cpp "") +CREATE_PODIO_TEST(write_ascii.cpp "") # Customize CTest to potentially disable some of the tests with known problems configure_file(CTestCustom.cmake ${CMAKE_BINARY_DIR}/CTestCustom.cmake @ONLY) - -find_package(Threads REQUIRED) -add_executable(unittest unittest.cpp frame.cpp) -target_link_libraries(unittest PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioRootIO) -if (ENABLE_SIO) - target_link_libraries(unittest PRIVATE podio::podioSioIO) -endif() - -# The unittests are a bit better and they are labelled so we can put together a -# list of labels that we want to ignore -set(filter_tests "") -if (NOT FORCE_RUN_ALL_TESTS) - if(USE_SANITIZER MATCHES "Address") - set(filter_tests "~[LEAK-FAIL]~[ASAN-FAIL]") - elseif(USE_SANITIZER MATCHES "Leak") - set(filter_tests "~[LEAK-FAIL]") - elseif(USE_SANITIZER MATCHES "Thread") - set(filter_tests "~[THREAD-FAIL]") - elseif(USE_SANITIZER MATCHES "Undefined") - set(filter_tests "~[UBSAN-FAIL]") - endif() -endif() - -option(SKIP_CATCH_DISCOVERY "Skip the Catch2 test discovery" OFF) - -# To work around https://github.com/catchorg/Catch2/issues/2424 we need the -# DL_PATH argument for catch_discoer_tests which requires CMake 3.22 at least -# The whole issue can be avoied if we skip the catch test discovery and set the -# environment on our own -if (CMAKE_VERSION VERSION_LESS 3.22) - set(SKIP_CATCH_DISCOVERY ON) -endif() - -if (USE_SANITIZER MATCHES "Memory(WithOrigin)?" OR SKIP_CATCH_DISCOVERY) - # Automatic test discovery fails with Memory sanitizers due to some issues in - # Catch2. So in that case we skip the discovery step and simply run the thing - # directly in the tests. - if (FORCE_RUN_ALL_TESTS OR SKIP_CATCH_DISCOVERY) - # Unfortunately Memory sanitizer seems to be really unhappy with Catch2 and - # it fails to succesfully launch the executable and execute any test. Here - # we just include them in order to have them show up as failing - add_test(NAME unittest COMMAND unittest ${filter_tests}) - set_property(TEST unittest - PROPERTY ENVIRONMENT - "LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH};PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR}" - ) - endif() -else() - include(Catch) - catch_discover_tests(unittest - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} - TEST_PREFIX "UT_" # make it possible to filter easily with -R ^UT - TEST_SPEC ${filter_tests} # discover only tests that are known to not fail - DL_PATHS ${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} - PROPERTIES - ENVIRONMENT - PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} - ) -endif() - -# Add tests for storing and retrieving the EDM definitions into the produced -# files -add_test(datamodel_def_store_roundtrip_root ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh - ${CMAKE_CURRENT_BINARY_DIR}/example_frame.root - datamodel - ${CMAKE_CURRENT_LIST_DIR} - ) -# The extension model needs to know about the upstream model for generation -add_test(datamodel_def_store_roundtrip_root_extension - ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh - ${CMAKE_CURRENT_BINARY_DIR}/example_frame.root - extension_model - ${CMAKE_CURRENT_LIST_DIR}/extension_model - --upstream-edm=datamodel:${CMAKE_CURRENT_LIST_DIR}/datalayout.yaml - ) - -# Need the input files that are produced by other tests -set_tests_properties( - datamodel_def_store_roundtrip_root - datamodel_def_store_roundtrip_root_extension - PROPERTIES - DEPENDS write_frame_root - ) - -set(sio_roundtrip_tests "") -if (ENABLE_SIO) - add_test(datamodel_def_store_roundtrip_sio - ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh - ${CMAKE_CURRENT_BINARY_DIR}/example_frame.sio - datamodel - ${CMAKE_CURRENT_LIST_DIR} - ) - # The extension model needs to know about the upstream model for generation - add_test(datamodel_def_store_roundtrip_sio_extension - ${CMAKE_CURRENT_LIST_DIR}/scripts/dumpModelRoundTrip.sh - ${CMAKE_CURRENT_BINARY_DIR}/example_frame.sio - extension_model - ${CMAKE_CURRENT_LIST_DIR}/extension_model - --upstream-edm=datamodel:${CMAKE_CURRENT_LIST_DIR}/datalayout.yaml - ) - - set(sio_roundtrip_tests - datamodel_def_store_roundtrip_sio - datamodel_def_store_roundtrip_sio_extension - ) - - set_tests_properties( - ${sio_roundtrip_tests} - PROPERTIES - DEPENDS write_frame_sio - ) -endif() - -# We need to convert this into a list of arguments that can be used as environment variable -list(JOIN PODIO_IO_HANDLERS " " IO_HANDLERS) - -set_tests_properties( - datamodel_def_store_roundtrip_root - datamodel_def_store_roundtrip_root_extension - ${sio_roundtrip_tests} - PROPERTIES - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - ENVIRONMENT - "PODIO_BASE=${CMAKE_SOURCE_DIR};IO_HANDLERS=${IO_HANDLERS};ENABLE_SIO=${ENABLE_SIO};PODIO_USE_CLANG_FORMAT=${PODIO_USE_CLANG_FORMAT};LD_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH};PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH};ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH};PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR}" - ) diff --git a/tests/dumpmodel/CMakeLists.txt b/tests/dumpmodel/CMakeLists.txt new file mode 100644 index 000000000..ccab43bc6 --- /dev/null +++ b/tests/dumpmodel/CMakeLists.txt @@ -0,0 +1,63 @@ +# Add tests for storing and retrieving the EDM definitions into the produced +# files +add_test(NAME datamodel_def_store_roundtrip_root COMMAND ${CMAKE_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh + ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root + datamodel + ${CMAKE_SOURCE_DIR}/tests + ) +PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_root) + +# The extension model needs to know about the upstream model for generation +add_test(NAME datamodel_def_store_roundtrip_root_extension COMMAND + ${CMAKE_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh + ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root + extension_model + ${CMAKE_SOURCE_DIR}/tests/extension_model + --upstream-edm=datamodel:${CMAKE_SOURCE_DIR}/tests/datalayout.yaml + ) +PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_root_extension) + +# Need the input files that are produced by other tests +set_tests_properties( + datamodel_def_store_roundtrip_root + datamodel_def_store_roundtrip_root_extension + PROPERTIES + DEPENDS write_frame_root + ) + +set(sio_roundtrip_tests "") +if (ENABLE_SIO) + add_test(NAME datamodel_def_store_roundtrip_sio COMMAND ${CMAKE_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh + ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio + datamodel + ${CMAKE_SOURCE_DIR}/tests + ) + PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_sio) + # The extension model needs to know about the upstream model for generation + add_test(NAME datamodel_def_store_roundtrip_sio_extension COMMAND ${CMAKE_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh + ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio + extension_model + ${CMAKE_SOURCE_DIR}/tests/extension_model + --upstream-edm=datamodel:${CMAKE_SOURCE_DIR}/tests/datalayout.yaml + ) + PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_sio_extension) + + set(sio_roundtrip_tests + datamodel_def_store_roundtrip_sio + datamodel_def_store_roundtrip_sio_extension + ) + + set_tests_properties( + ${sio_roundtrip_tests} + PROPERTIES + DEPENDS write_frame_sio + ) +endif() + +set_tests_properties( + datamodel_def_store_roundtrip_root + datamodel_def_store_roundtrip_root_extension + ${sio_roundtrip_tests} + PROPERTIES + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) diff --git a/tests/frame_test_common.h b/tests/frame_test_common.h new file mode 100644 index 000000000..78862b40d --- /dev/null +++ b/tests/frame_test_common.h @@ -0,0 +1,28 @@ +#ifndef PODIO_TESTS_FRAME_TEST_COMMON_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_FRAME_TEST_COMMON_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include +#include + +static const std::vector collsToWrite = {"mcparticles", + "moreMCs", + "arrays", + "mcParticleRefs", + "hits", + "hitRefs", + "refs", + "refs2", + "clusters", + "OneRelation", + "info", + "WithVectorMember", + "fixedWidthInts", + "userInts", + "userDoubles", + "WithNamespaceMember", + "WithNamespaceRelation", + "WithNamespaceRelationCopy", + "emptyCollection", + "emptySubsetColl"}; + +#endif // PODIO_TESTS_FRAME_TEST_COMMON_H diff --git a/tests/read_frame_auxiliary.h b/tests/read_frame_auxiliary.h index 66473e2c3..8ae4ff1f7 100644 --- a/tests/read_frame_auxiliary.h +++ b/tests/read_frame_auxiliary.h @@ -1,7 +1,7 @@ #ifndef PODIO_TESTS_READ_FRAME_AUXILIARY_H // NOLINT(llvm-header-guard): folder structure not suitable #define PODIO_TESTS_READ_FRAME_AUXILIARY_H // NOLINT(llvm-header-guard): folder structure not suitable -#include "write_frame.h" +#include "frame_test_common.h" #include "podio/Frame.h" #include "podio/podioVersion.h" diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt new file mode 100644 index 000000000..47efd0783 --- /dev/null +++ b/tests/root_io/CMakeLists.txt @@ -0,0 +1,61 @@ +set(root_dependent_tests + write.cpp + read.cpp + read-multiple.cpp + relation_range.cpp + read_and_write.cpp + read_and_write_associated.cpp + write_timed.cpp + read_timed.cpp + read_frame_root.cpp + write_frame_root.cpp + read_frame_legacy_root.cpp + read_frame_root_multiple.cpp + ) +set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO) +foreach( sourcefile ${root_dependent_tests} ) + CREATE_PODIO_TEST(${sourcefile} "${root_libs}") +endforeach() + + + +#--- set some dependencies between the different tests to ensure input generating ones are run first +set_property(TEST read PROPERTY DEPENDS write) +set_property(TEST read-multiple PROPERTY DEPENDS write) +set_property(TEST read_and_write PROPERTY DEPENDS write) +set_property(TEST read_frame_legacy_root PROPERTY DEPENDS write) +set_property(TEST read_timed PROPERTY DEPENDS write_timed) +set_property(TEST read_frame_root PROPERTY DEPENDS write_frame_root) +set_property(TEST read_frame_root_multiple PROPERTY DEPENDS write_frame_root) + +add_test(NAME check_benchmark_outputs COMMAND check_benchmark_outputs write_benchmark_root.root read_benchmark_root.root) +set_property(TEST check_benchmark_outputs PROPERTY DEPENDS read_timed write_timed) + + +# If the variable is cached and defined now, we have inputs and can add the +# legacy file read test +if (DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR}) + message(STATUS "Using test inputs stored in: " ${PODIO_TEST_INPUT_DATA_DIR}) + add_executable(read-legacy-files-root read-legacy-files-root.cpp) + target_link_libraries(read-legacy-files-root PRIVATE TestDataModel TestDataModelDict podio::podioRootIO) + + # Add a legacy test case based on a base executable and a version for which an + # input file exists + macro(ADD_PODIO_LEGACY_TEST version base_test input_file) + add_test(NAME ${base_test}_${version} COMMAND ${base_test} ${PODIO_TEST_INPUT_DATA_DIR}/${version}/${input_file}) + set_property(TEST ${base_test}_${version} PROPERTY ENVIRONMENT + LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/tests:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH} + # Clear the ROOT_INCLUDE_PATH for the tests, to avoid potential conflicts + # with existing headers from other installations + ROOT_INCLUDE_PATH= + ) + endmacro() + + ADD_PODIO_LEGACY_TEST(v00-13 read-legacy-files-root example.root legacy_test_cases) + + set(legacy_versions v00-16 v00-16-05 PARENT_SCOPE) + foreach(version IN LISTS legacy_versions) + ADD_PODIO_LEGACY_TEST(${version} read-legacy-files-root example.root legacy_test_cases) + ADD_PODIO_LEGACY_TEST(${version} read_frame_root example_frame.root legacy_test_cases) + endforeach() +endif() diff --git a/tests/read-legacy-files-root.cpp b/tests/root_io/read-legacy-files-root.cpp similarity index 100% rename from tests/read-legacy-files-root.cpp rename to tests/root_io/read-legacy-files-root.cpp diff --git a/tests/read-multiple.cpp b/tests/root_io/read-multiple.cpp similarity index 100% rename from tests/read-multiple.cpp rename to tests/root_io/read-multiple.cpp diff --git a/tests/read.cpp b/tests/root_io/read.cpp similarity index 100% rename from tests/read.cpp rename to tests/root_io/read.cpp diff --git a/tests/read_and_write.cpp b/tests/root_io/read_and_write.cpp similarity index 100% rename from tests/read_and_write.cpp rename to tests/root_io/read_and_write.cpp diff --git a/tests/read_and_write_associated.cpp b/tests/root_io/read_and_write_associated.cpp similarity index 100% rename from tests/read_and_write_associated.cpp rename to tests/root_io/read_and_write_associated.cpp diff --git a/tests/read_frame_legacy_root.cpp b/tests/root_io/read_frame_legacy_root.cpp similarity index 100% rename from tests/read_frame_legacy_root.cpp rename to tests/root_io/read_frame_legacy_root.cpp diff --git a/tests/read_frame_root.cpp b/tests/root_io/read_frame_root.cpp similarity index 100% rename from tests/read_frame_root.cpp rename to tests/root_io/read_frame_root.cpp diff --git a/tests/read_frame_root_multiple.cpp b/tests/root_io/read_frame_root_multiple.cpp similarity index 100% rename from tests/read_frame_root_multiple.cpp rename to tests/root_io/read_frame_root_multiple.cpp diff --git a/tests/read_timed.cpp b/tests/root_io/read_timed.cpp similarity index 100% rename from tests/read_timed.cpp rename to tests/root_io/read_timed.cpp diff --git a/tests/relation_range.cpp b/tests/root_io/relation_range.cpp similarity index 100% rename from tests/relation_range.cpp rename to tests/root_io/relation_range.cpp diff --git a/tests/write.cpp b/tests/root_io/write.cpp similarity index 100% rename from tests/write.cpp rename to tests/root_io/write.cpp diff --git a/tests/write_frame_root.cpp b/tests/root_io/write_frame_root.cpp similarity index 100% rename from tests/write_frame_root.cpp rename to tests/root_io/write_frame_root.cpp diff --git a/tests/write_timed.cpp b/tests/root_io/write_timed.cpp similarity index 100% rename from tests/write_timed.cpp rename to tests/root_io/write_timed.cpp diff --git a/tests/sio_io/CMakeLists.txt b/tests/sio_io/CMakeLists.txt new file mode 100644 index 000000000..ee9a80be7 --- /dev/null +++ b/tests/sio_io/CMakeLists.txt @@ -0,0 +1,32 @@ +if (TARGET TestDataModelSioBlocks) + set(sio_dependent_tests + write_sio.cpp + read_sio.cpp + read_and_write_sio.cpp + write_timed_sio.cpp + read_timed_sio.cpp + read_frame_sio.cpp + write_frame_sio.cpp + read_frame_legacy_sio.cpp) + set(sio_libs podio::podioSioIO) + foreach( sourcefile ${sio_dependent_tests} ) + CREATE_PODIO_TEST(${sourcefile} "${sio_libs}") + endforeach() + + # These need to be linked against TTree explicitly, since it is not done + # through another library and the TimedReader/Writer decorators are + # header-only wrappers + target_link_libraries(write_timed_sio PRIVATE ROOT::Tree) + target_link_libraries(read_timed_sio PRIVATE ROOT::Tree) +endif() + +if (TARGET read_sio) + set_property(TEST read_sio PROPERTY DEPENDS write_sio) + set_property(TEST read_and_write_sio PROPERTY DEPENDS write_sio) + set_property(TEST read_timed_sio PROPERTY DEPENDS write_timed_sio) + set_property(TEST read_frame_sio PROPERTY DEPENDS write_frame_sio) + set_property(TEST read_frame_legacy_sio PROPERTY DEPENDS write_sio) + + add_test(NAME check_benchmark_outputs_sio COMMAND check_benchmark_outputs write_benchmark_sio.root read_benchmark_sio.root) + set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio write_timed_sio) +endif() diff --git a/tests/read_and_write_sio.cpp b/tests/sio_io/read_and_write_sio.cpp similarity index 100% rename from tests/read_and_write_sio.cpp rename to tests/sio_io/read_and_write_sio.cpp diff --git a/tests/read_frame_legacy_sio.cpp b/tests/sio_io/read_frame_legacy_sio.cpp similarity index 100% rename from tests/read_frame_legacy_sio.cpp rename to tests/sio_io/read_frame_legacy_sio.cpp diff --git a/tests/read_frame_sio.cpp b/tests/sio_io/read_frame_sio.cpp similarity index 100% rename from tests/read_frame_sio.cpp rename to tests/sio_io/read_frame_sio.cpp diff --git a/tests/read_sio.cpp b/tests/sio_io/read_sio.cpp similarity index 100% rename from tests/read_sio.cpp rename to tests/sio_io/read_sio.cpp diff --git a/tests/read_timed_sio.cpp b/tests/sio_io/read_timed_sio.cpp similarity index 100% rename from tests/read_timed_sio.cpp rename to tests/sio_io/read_timed_sio.cpp diff --git a/tests/write_frame_sio.cpp b/tests/sio_io/write_frame_sio.cpp similarity index 100% rename from tests/write_frame_sio.cpp rename to tests/sio_io/write_frame_sio.cpp diff --git a/tests/write_sio.cpp b/tests/sio_io/write_sio.cpp similarity index 100% rename from tests/write_sio.cpp rename to tests/sio_io/write_sio.cpp diff --git a/tests/write_timed_sio.cpp b/tests/sio_io/write_timed_sio.cpp similarity index 100% rename from tests/write_timed_sio.cpp rename to tests/sio_io/write_timed_sio.cpp diff --git a/tests/unittests/CMakeLists.txt b/tests/unittests/CMakeLists.txt new file mode 100644 index 000000000..1103de315 --- /dev/null +++ b/tests/unittests/CMakeLists.txt @@ -0,0 +1,91 @@ +if(USE_EXTERNAL_CATCH2) + if (USE_EXTERNAL_CATCH2 STREQUAL AUTO) + find_package(Catch2 3.1) + else() + find_package(Catch2 3.1 REQUIRED) + endif() +endif() + +if(NOT Catch2_FOUND) + message(STATUS "Fetching local copy of Catch2 library for unit-tests...") + # Build Catch2 with the default flags, to avoid generating warnings when we + # build it + set(CXX_FLAGS_CMAKE_USED ${CMAKE_CXX_FLAGS}) + set(CMAKE_CXX_FLAGS ${CXX_FLAGS_CMAKE_DEFAULTS}) + Include(FetchContent) + FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG v3.1.0 + ) + FetchContent_MakeAvailable(Catch2) + set(CMAKE_MODULE_PATH ${Catch2_SOURCE_DIR}/extras ${CMAKE_MODULE_PATH}) + + # Disable clang-tidy on external contents + set_target_properties(Catch2 PROPERTIES CXX_CLANG_TIDY "") + + # Hack around the fact, that the include directories are not declared as + # SYSTEM for the targets defined this way. Otherwise warnings can still occur + # in Catch2 code when templates are evaluated (which happens quite a bit) + get_target_property(CATCH2_IF_INC_DIRS Catch2 INTERFACE_INCLUDE_DIRECTORIES) + set_target_properties(Catch2 PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${CATCH2_IF_INC_DIRS}") + + # Reset the flags + set(CMAKE_CXX_FLAGS ${CXX_FLAGS_CMAKE_USED}) +endif() + +find_package(Threads REQUIRED) +add_executable(unittest unittest.cpp frame.cpp) +target_link_libraries(unittest PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioRootIO) +if (ENABLE_SIO) + target_link_libraries(unittest PRIVATE podio::podioSioIO) +endif() + +# The unittests can easily be filtered and they are labelled so we can put together a +# list of labels that we want to ignore +set(filter_tests "") +if (NOT FORCE_RUN_ALL_TESTS) + if(USE_SANITIZER MATCHES "Address") + set(filter_tests "~[LEAK-FAIL]~[ASAN-FAIL]") + elseif(USE_SANITIZER MATCHES "Leak") + set(filter_tests "~[LEAK-FAIL]") + elseif(USE_SANITIZER MATCHES "Thread") + set(filter_tests "~[THREAD-FAIL]") + elseif(USE_SANITIZER MATCHES "Undefined") + set(filter_tests "~[UBSAN-FAIL]") + endif() +endif() + +option(SKIP_CATCH_DISCOVERY "Skip the Catch2 test discovery" OFF) + +# To work around https://github.com/catchorg/Catch2/issues/2424 we need the +# DL_PATH argument for catch_discoer_tests which requires CMake 3.22 at least +# The whole issue can be avoied if we skip the catch test discovery and set the +# environment on our own +if (CMAKE_VERSION VERSION_LESS 3.22) + set(SKIP_CATCH_DISCOVERY ON) +endif() + +if (USE_SANITIZER MATCHES "Memory(WithOrigin)?" OR SKIP_CATCH_DISCOVERY) + # Automatic test discovery fails with Memory sanitizers due to some issues in + # Catch2. So in that case we skip the discovery step and simply run the thing + # directly in the tests. + if (FORCE_RUN_ALL_TESTS OR SKIP_CATCH_DISCOVERY) + # Unfortunately Memory sanitizer seems to be really unhappy with Catch2 and + # it fails to succesfully launch the executable and execute any test. Here + # we just include them in order to have them show up as failing + add_test(NAME unittest COMMAND unittest ${filter_tests}) + PODIO_SET_TEST_ENV(unittest) + endif() +else() + include(Catch) + catch_discover_tests(unittest + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + TEST_PREFIX "UT_" # make it possible to filter easily with -R ^UT + TEST_SPEC ${filter_tests} # discover only tests that are known to not fail + DL_PATHS ${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + PROPERTIES + ENVIRONMENT + PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} + ) +endif() diff --git a/tests/frame.cpp b/tests/unittests/frame.cpp similarity index 100% rename from tests/frame.cpp rename to tests/unittests/frame.cpp diff --git a/tests/unittest.cpp b/tests/unittests/unittest.cpp similarity index 100% rename from tests/unittest.cpp rename to tests/unittests/unittest.cpp diff --git a/tests/write_frame.h b/tests/write_frame.h index 891a029d5..8a0a2b3d3 100644 --- a/tests/write_frame.h +++ b/tests/write_frame.h @@ -1,6 +1,8 @@ #ifndef PODIO_TESTS_WRITE_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable #define PODIO_TESTS_WRITE_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable +#include "frame_test_common.h" + #include "datamodel/EventInfoCollection.h" #include "datamodel/ExampleClusterCollection.h" #include "datamodel/ExampleHitCollection.h" @@ -22,28 +24,6 @@ #include #include -#include - -static const std::vector collsToWrite = {"mcparticles", - "moreMCs", - "arrays", - "mcParticleRefs", - "hits", - "hitRefs", - "refs", - "refs2", - "clusters", - "OneRelation", - "info", - "WithVectorMember", - "fixedWidthInts", - "userInts", - "userDoubles", - "WithNamespaceMember", - "WithNamespaceRelation", - "WithNamespaceRelationCopy", - "emptyCollection", - "emptySubsetColl"}; auto createMCCollection() { auto mcps = ExampleMCCollection(); diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index f86e38210..4c73df578 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -15,15 +15,7 @@ if(BUILD_TESTING) if (ENABLE_SIO) set(SIO_LD_PATH $) endif() - - set_property(TEST ${name} - PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/tests:${CMAKE_BINARY_DIR}/src:$:${SIO_LD_PATH}:$ENV{LD_LIBRARY_PATH} - PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH} - ROOT_INCLUDE_PATH=${CMAKE_SOURCE_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include:$ENV{ROOT_INCLUDE_PATH} - # Only pick up this build for testing - PODIO_SIOBLOCK_PATH=${CMAKE_BINARY_DIR}/tests - ) + PODIO_SET_TEST_ENV(${name}) set_tests_properties(${name} PROPERTIES DEPENDS ${depends_on} @@ -32,16 +24,16 @@ if(BUILD_TESTING) endfunction() CREATE_DUMP_TEST(podio-dump-help _dummy_target_ --help) - CREATE_DUMP_TEST(podio-dump-root-legacy "write" ${CMAKE_BINARY_DIR}/tests/example.root) - CREATE_DUMP_TEST(podio-dump-root "write_frame_root" ${CMAKE_BINARY_DIR}/tests/example_frame.root) - CREATE_DUMP_TEST(podio-dump-detailed-root "write_frame_root" --detailed --category other_events --entries 2:3 ${CMAKE_BINARY_DIR}/tests/example_frame.root) - CREATE_DUMP_TEST(podio-dump-detailed-root-legacy "write" --detailed --entries 2:3 ${CMAKE_BINARY_DIR}/tests/example.root) + CREATE_DUMP_TEST(podio-dump-root-legacy "write" ${CMAKE_BINARY_DIR}/tests/root_io/example.root) + CREATE_DUMP_TEST(podio-dump-root "write_frame_root" ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root) + CREATE_DUMP_TEST(podio-dump-detailed-root "write_frame_root" --detailed --category other_events --entries 2:3 ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root) + CREATE_DUMP_TEST(podio-dump-detailed-root-legacy "write" --detailed --entries 2:3 ${CMAKE_BINARY_DIR}/tests/root_io/example.root) if (ENABLE_SIO) - CREATE_DUMP_TEST(podio-dump-sio-legacy "write_sio" ${CMAKE_BINARY_DIR}/tests/example.sio) - CREATE_DUMP_TEST(podio-dump-sio "write_frame_sio" --entries 4:7 ${CMAKE_BINARY_DIR}/tests/example_frame.sio) - CREATE_DUMP_TEST(podio-dump-detailed-sio "write_frame_sio" --detailed --entries 9 ${CMAKE_BINARY_DIR}/tests/example_frame.sio) - CREATE_DUMP_TEST(podio-dump-detailed-sio-legacy "write_sio" --detailed --entries 9 ${CMAKE_BINARY_DIR}/tests/example.sio) + CREATE_DUMP_TEST(podio-dump-sio-legacy "write_sio" ${CMAKE_BINARY_DIR}/tests/sio_io/example.sio) + CREATE_DUMP_TEST(podio-dump-sio "write_frame_sio" --entries 4:7 ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio) + CREATE_DUMP_TEST(podio-dump-detailed-sio "write_frame_sio" --detailed --entries 9 ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio) + CREATE_DUMP_TEST(podio-dump-detailed-sio-legacy "write_sio" --detailed --entries 9 ${CMAKE_BINARY_DIR}/tests/sio_io/example.sio) endif() endif() From 184c0970440556e1d2c749ec36617d24a32970e2 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 23 Jun 2023 20:27:16 +0200 Subject: [PATCH 080/100] Fix crash in `ROOTLegacyReader` when reading actual legacy files (#434) * Download more legacy file versions Rework download logic to have one source of legacy versions * Add tests that read legacy files through the legacy Frame reader * Make ROOTLegacyReader handle older versions correctly --- cmake/podioTest.cmake | 4 ++-- src/ROOTLegacyReader.cc | 26 ++++++++++++++++++------ tests/CMakeLists.txt | 8 +++++++- tests/CTestCustom.cmake | 3 ++- tests/root_io/CMakeLists.txt | 4 ++-- tests/root_io/read_frame_legacy_root.cpp | 15 ++++++++++---- tests/scripts/get_test_inputs.sh | 2 +- 7 files changed, 45 insertions(+), 17 deletions(-) diff --git a/cmake/podioTest.cmake b/cmake/podioTest.cmake index ebc966a83..f668a8d7b 100644 --- a/cmake/podioTest.cmake +++ b/cmake/podioTest.cmake @@ -28,14 +28,14 @@ function(CREATE_PODIO_TEST sourcefile additional_libs) endfunction() #--- utility macro to facilitate the downloading of legacy input data -macro(PODIO_DOWNLOAD_LEGACY_INPUTS) +macro(PODIO_DOWNLOAD_LEGACY_INPUTS legacy_versions) # Avoid fetching these everytime cmake is run by caching the directory the first # time the inputs are fetched or if the expected file does not exist in the # expected directory if (NOT DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR} OR NOT EXISTS ${PODIO_TEST_INPUT_DATA_DIR}/v00-16-05/example_frame.root) message(STATUS "Getting test input files") execute_process( - COMMAND bash ${CMAKE_SOURCE_DIR}/tests/scripts/get_test_inputs.sh + COMMAND bash ${CMAKE_SOURCE_DIR}/tests/scripts/get_test_inputs.sh ${legacy_versions} OUTPUT_VARIABLE podio_test_input_data_dir RESULT_VARIABLE test_inputs_available ) diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index 0ce48bf77..6a8b55a3b 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -123,19 +123,36 @@ void ROOTLegacyReader::openFiles(const std::vector& filenames) { auto metadatatree = static_cast(m_chain->GetFile()->Get("metadata")); m_table = std::make_shared(); auto* table = m_table.get(); - metadatatree->SetBranchAddress("CollectionIDs", &table); + auto* tableBranch = root_utils::getBranch(metadatatree, "CollectionIDs"); + tableBranch->SetAddress(&table); + tableBranch->GetEntry(0); podio::version::Version* versionPtr{nullptr}; if (auto* versionBranch = root_utils::getBranch(metadatatree, "PodioVersion")) { versionBranch->SetAddress(&versionPtr); + versionBranch->GetEntry(0); } + m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0}; + delete versionPtr; // Check if the CollectionTypeInfo branch is there and assume that the file // has been written with with podio pre #197 (<0.13.1) if that is not the case if (auto* collInfoBranch = root_utils::getBranch(metadatatree, "CollectionTypeInfo")) { auto collectionInfo = new std::vector; - collInfoBranch->SetAddress(&collectionInfo); - metadatatree->GetEntry(0); + + if (m_fileVersion < podio::version::Version{0, 16, 4}) { + auto oldCollInfo = new std::vector(); + collInfoBranch->SetAddress(&oldCollInfo); + collInfoBranch->GetEntry(0); + collectionInfo->reserve(oldCollInfo->size()); + for (auto&& [collID, collType, isSubsetColl] : *oldCollInfo) { + collectionInfo->emplace_back(collID, std::move(collType), isSubsetColl, 1u); + } + delete oldCollInfo; + } else { + collInfoBranch->SetAddress(&collectionInfo); + collInfoBranch->GetEntry(0); + } createCollectionBranches(*collectionInfo); delete collectionInfo; } else { @@ -145,9 +162,6 @@ void ROOTLegacyReader::openFiles(const std::vector& filenames) { const auto collectionInfo = root_utils::reconstructCollectionInfo(m_chain.get(), *m_table); createCollectionBranches(collectionInfo); } - - m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0}; - delete versionPtr; } unsigned ROOTLegacyReader::getEntries(const std::string& name) const { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index de0d282e6..6591196c8 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -41,8 +41,14 @@ PODIO_ADD_ROOT_IO_DICT(ExtensionDataModelDict ExtensionDataModel "${ext_headers} PODIO_ADD_SIO_IO_BLOCKS(ExtensionDataModel "${ext_headers}" "${ext_sources}") +set(legacy_test_versions + v00-16 + v00-16-02 + v00-16-05 +) + ### Define the actual tests -PODIO_DOWNLOAD_LEGACY_INPUTS() +PODIO_DOWNLOAD_LEGACY_INPUTS("${legacy_test_versions}") add_executable(check_benchmark_outputs check_benchmark_outputs.cpp) target_link_libraries(check_benchmark_outputs PRIVATE ROOT::Tree) diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index f435ebc7c..b0e683f65 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -60,9 +60,10 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ datamodel_def_store_roundtrip_sio_extension ) - foreach(version in @legacy_versions@) + foreach(version in @legacy_test_versions@) list(APPEND CTEST_CUSTOM_TESTS_IGNORE read-legacy-files-root_${version}) list(APPEND CTEST_CUSTOM_TESTS_IGNORE read_frame_root_${version}) + list(APPEND CTEST_CUSTOM_TESTS_IGNORE read_frame_legacy_root_${version}) endforeach() # ostream_operator is working with Memory sanitizer (at least locally) diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index 47efd0783..d0d8b21c4 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -53,9 +53,9 @@ if (DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR}) ADD_PODIO_LEGACY_TEST(v00-13 read-legacy-files-root example.root legacy_test_cases) - set(legacy_versions v00-16 v00-16-05 PARENT_SCOPE) - foreach(version IN LISTS legacy_versions) + foreach(version IN LISTS legacy_test_versions) ADD_PODIO_LEGACY_TEST(${version} read-legacy-files-root example.root legacy_test_cases) ADD_PODIO_LEGACY_TEST(${version} read_frame_root example_frame.root legacy_test_cases) + ADD_PODIO_LEGACY_TEST(${version} read_frame_legacy_root example.root legacy_test_cases) endforeach() endif() diff --git a/tests/root_io/read_frame_legacy_root.cpp b/tests/root_io/read_frame_legacy_root.cpp index f1038eabc..f6c410220 100644 --- a/tests/root_io/read_frame_legacy_root.cpp +++ b/tests/root_io/read_frame_legacy_root.cpp @@ -5,16 +5,23 @@ #include -int main() { +int main(int argc, char* argv[]) { + std::string inputFile = "example.root"; + bool assertBuildVersion = true; + if (argc == 2) { + inputFile = argv[1]; + assertBuildVersion = false; + } + auto reader = podio::ROOTLegacyReader(); try { - reader.openFile("example.root"); + reader.openFile(inputFile); } catch (const std::runtime_error& e) { - std::cout << "File could not be opened, aborting." << std::endl; + std::cout << "File (" << inputFile << ")could not be opened, aborting." << std::endl; return 1; } - if (reader.currentFileVersion() != podio::version::build_version) { + if (assertBuildVersion && reader.currentFileVersion() != podio::version::build_version) { std::cerr << "The podio build version could not be read back correctly. " << "(expected:" << podio::version::build_version << ", actual: " << reader.currentFileVersion() << ")" << std::endl; diff --git a/tests/scripts/get_test_inputs.sh b/tests/scripts/get_test_inputs.sh index c596ca3f7..523c3d0cc 100644 --- a/tests/scripts/get_test_inputs.sh +++ b/tests/scripts/get_test_inputs.sh @@ -11,7 +11,7 @@ cd ${PODIO_TEST_INPUT_DATA_DIR} mkdir v00-13 && cd v00-13 wget https://key4hep.web.cern.ch:443/testFiles/podio/v00-13/example.root > /dev/null 2>&1 -for version in v00-16 v00-16-05; do +for version in $@; do cd ${PODIO_TEST_INPUT_DATA_DIR} mkdir ${version} && cd ${version} for fileName in example.root example_frame.root; do From c4e11bc7c662cc13694c006a0e60425823a37c47 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 27 Jun 2023 16:02:55 +0200 Subject: [PATCH 081/100] Add schema evolution hooks to Frame (#413) * Remove unused fields * Add SchemaEvolution singleton to hold evolution functions * Inject type information into collection buffers * Inject current schema version into buffers from buffer factory * [wip] Start populating SchemaEvolution registry * [wip] Split registration into two steps Easier this way to get the current version information into the whole system * [wip] Require registration of each evolution function Remove two step registration again * [clang-tidy] Mark inputs as const& for now * Remove registry from docstrings --- include/podio/CollectionBuffers.h | 10 +- include/podio/Frame.h | 13 ++- include/podio/SchemaEvolution.h | 136 +++++++++++++++++++++++++- python/templates/Collection.cc.jinja2 | 4 + src/CMakeLists.txt | 1 + src/SchemaEvolution.cc | 78 +++++++++++++++ src/UserDataCollection.cc | 13 ++- 7 files changed, 248 insertions(+), 7 deletions(-) create mode 100644 src/SchemaEvolution.cc diff --git a/include/podio/CollectionBuffers.h b/include/podio/CollectionBuffers.h index 8846af162..37ee07fe4 100644 --- a/include/podio/CollectionBuffers.h +++ b/include/podio/CollectionBuffers.h @@ -43,20 +43,22 @@ struct CollectionWriteBuffers { }; struct CollectionReadBuffers { - bool needsSchemaEvolution{false}; void* data{nullptr}; - void* data_oldschema{nullptr}; CollRefCollection* references{nullptr}; VectorMembersInfo* vectorMembers{nullptr}; + SchemaVersionT schemaVersion{0}; + std::string_view type{}; using CreateFuncT = std::function(podio::CollectionReadBuffers, bool)>; using RecastFuncT = std::function; - CollectionReadBuffers(void* d, CollRefCollection* ref, VectorMembersInfo* vec, CreateFuncT&& createFunc, - RecastFuncT&& recastFunc) : + CollectionReadBuffers(void* d, CollRefCollection* ref, VectorMembersInfo* vec, SchemaVersionT version, + std::string_view typ, CreateFuncT&& createFunc, RecastFuncT&& recastFunc) : data(d), references(ref), vectorMembers(vec), + schemaVersion(version), + type(typ), createCollection(std::move(createFunc)), recast(std::move(recastFunc)) { } diff --git a/include/podio/Frame.h b/include/podio/Frame.h index 56ffe9cc2..a71bba336 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -2,9 +2,11 @@ #define PODIO_FRAME_H #include "podio/CollectionBase.h" +#include "podio/CollectionBufferFactory.h" #include "podio/CollectionIDTable.h" #include "podio/GenericParameters.h" #include "podio/ICollectionProvider.h" +#include "podio/SchemaEvolution.h" #include "podio/utilities/TypeHelpers.h" #include @@ -366,7 +368,16 @@ podio::CollectionBase* Frame::FrameModel::doGet(const std::string& n buffers = unpack(m_data.get(), name); } if (buffers) { - auto coll = buffers->createCollection(buffers.value(), buffers->data == nullptr); + std::unique_ptr coll{nullptr}; + // Subset collections do not need schema evolution (by definition) + if (buffers->data == nullptr) { + coll = buffers->createCollection(buffers.value(), true); + } else { + auto evolvedBuffers = podio::SchemaEvolution::instance().evolveBuffers(buffers.value(), buffers->schemaVersion, + std::string(buffers->type)); + coll = evolvedBuffers.createCollection(evolvedBuffers, false); + } + coll->prepareAfterRead(); coll->setID(m_idTable.collectionID(name)); { diff --git a/include/podio/SchemaEvolution.h b/include/podio/SchemaEvolution.h index fd77fddb6..e6101b692 100644 --- a/include/podio/SchemaEvolution.h +++ b/include/podio/SchemaEvolution.h @@ -2,13 +2,147 @@ #define PODIO_SCHEMAEVOLUTION_H #include +#include +#include +#include +#include namespace podio { enum class Backend { ROOT, SIO }; +/// The type used for schema versions throughout using SchemaVersionT = uint32_t; +struct CollectionReadBuffers; + +/** + * The SchemaEvolution holds evolution functions that allow to transform + * CollectionReadBuffers of known datatypes from a previous schema version to + * the current schema version. From the evolved buffers it is then possible to + * create collections. + * + * It is implemented as a singleton that is populated at the time shared + * datamodel libraries (or their schema evolution libraries) are loaded. It is + * assumed that this happens early on in the startup of any application, such + * that the registration still happens on a single thread. After this + * initialization evolutions can be done from multiple threads. + */ +class SchemaEvolution { + /// The interface of any evolution function takes buffers and a version and + /// returns buffers. + using EvolutionFuncT = std::function; + /// Each datatype gets its own version "map" where the index defines the + /// version from which the schema evolution has to start to end up in the + /// current version + using EvolFuncVersionMapT = std::vector; + + /** + * Helper struct combining the current schema version of each type and an + * index into the schema evolution "map" below + */ + struct MapIndex { + SchemaVersionT currentVersion; ///< The current schema version for each type + size_t index; ///< The index in the evolution function map + /// Tombstone value indicating that no evolution function is available (yet) + constexpr static size_t NoEvolutionAvailable = -1u; + }; + + /// The map that holds the current version for each type that is known to + /// the schema evolution + using VersionMapT = std::unordered_map; + /// The "map" that holds all evolution functions + using EvolutionMapT = std::vector; + +public: + /** + * Enum to make it possible to prioritize evolution functions during + * registration, making AutoGenerated lower priority than UserDefined + */ + enum class Priority { AutoGenerated = 0, UserDefined = 1 }; + + /// The SchemaEvolution is a singleton so we disable all copy and move + /// constructors explicitly + SchemaEvolution(const SchemaEvolution&) = delete; + SchemaEvolution& operator=(const SchemaEvolution&) = delete; + SchemaEvolution(SchemaEvolution&&) = delete; + SchemaEvolution& operator=(SchemaEvolution&&) = delete; + ~SchemaEvolution() = default; + + /// Mutable instance only used for the initial registration of functions + /// during library loading + static SchemaEvolution& mutInstance(); + /// Get the instance for evolving buffers + static SchemaEvolution const& instance(); + + /** + * Evolve the passed in buffers to the current version of the datatype that + * can be constructed from them. + * + * Internally this will first check if the schema version of the buffers is + * already the current one and in that case immediately return the passed in + * buffers again as they do not need schema evolution. If that is not the case + * it will look up the correct evolution function for the passed in version + * and call that on the passed in buffers. + * + * @param oldBuffers The buffers to be evolved + * @param fromVersion The schema version of the buffers + * @param collType The fully qualified collection type + * + * @returns CollectionReadBuffers that have been evolved to the current + * version. NOTE that these could also be the input buffers. + */ + podio::CollectionReadBuffers evolveBuffers(const podio::CollectionReadBuffers& oldBuffers, SchemaVersionT fromVersion, + const std::string& collType) const; + + /** + * Register an evoution function for a given collection type and given + * versions from where to where the evolution applies. + * + * Several assumptions are in place here: + * + * - The current version has to be the same for all invocations for a given + * datatype. + * - An evolution function has to be registered for all possible versions from + * 1 to N - 1, where N is the current version + * - An evolution function can only be registerd once for a given datatype and + * fromVersion + * - For auto generated code the passed in priority has to be AutoGenerated + * otherwise it might override user defined functions + * - Even if a datatype does not require schema evolution it has to register + * an evolution function (e.g. the noOpSchemaEvolution below) in order to be + * known to the internal map. + * + * @param collType The fully qualified collection data type + * @param fromVersion The version from which this evolution function should + * apply + * @param currentVersion The current schema version for the data type + * @param evolutionFunc The evolution function that evolves passed in buffers + * from fromVersion to currrentVersion + * @param priority The priority of this evolution function. Defaults to + * UserDefined which overrides auto generated functionality. + */ + void registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, SchemaVersionT currentVersion, + const EvolutionFuncT& evolutionFunc, Priority priority = Priority::UserDefined); + + /** + * A no-op schema evolution function that returns the buffers unchanged. + * + * This can be used for registering an evolution function for datatypes that + * do not require schema evolution, but need to register themselves with + * SchemaEvolution + */ + static podio::CollectionReadBuffers noOpSchemaEvolution(podio::CollectionReadBuffers&& buffers, SchemaVersionT); + +private: + SchemaEvolution() = default; + + /// The map containing types and MapIndex structs + VersionMapT m_versionMapIndices{}; + /// The "map" holding the evolution functions + EvolutionMapT m_evolutionFuncs{}; +}; + } // namespace podio -#endif \ No newline at end of file +#endif // PODIO_SCHEMAEVOLUTION_H diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 1789a83f6..88a18b305 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -4,6 +4,7 @@ // AUTOMATICALLY GENERATED FILE - DO NOT EDIT #include "podio/CollectionBufferFactory.h" +#include "podio/SchemaEvolution.h" #include "{{ incfolder }}{{ class.bare_type }}Collection.h" #include "{{ incfolder }}DatamodelDefinition.h" @@ -170,6 +171,8 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const { namespace { podio::CollectionReadBuffers createBuffers(bool isSubset) { auto readBuffers = podio::CollectionReadBuffers{}; + readBuffers.type = "{{ class.full_type }}Collection"; + readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion; readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; // The number of ObjectID vectors is either 1 or the sum of OneToMany and @@ -214,6 +217,7 @@ bool registerCollection() { const static auto reg = []() { auto& factory = podio::CollectionBufferFactory::mutInstance(); factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers); + return true; }(); return reg; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7f71864e3..394f23057 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -54,6 +54,7 @@ SET(core_sources UserDataCollection.cc CollectionBufferFactory.cc MurmurHash3.cpp + SchemaEvolution.cc ) SET(core_headers diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc new file mode 100644 index 000000000..2377e5833 --- /dev/null +++ b/src/SchemaEvolution.cc @@ -0,0 +1,78 @@ +#include "podio/SchemaEvolution.h" +#include "podio/CollectionBuffers.h" + +#include + +namespace podio { + +SchemaEvolution& SchemaEvolution::mutInstance() { + static SchemaEvolution instance; + return instance; +} + +SchemaEvolution const& SchemaEvolution::instance() { + return mutInstance(); +} + +podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(const podio::CollectionReadBuffers& oldBuffers, + SchemaVersionT fromVersion, + const std::string& collType) const { + if (const auto typeIt = m_versionMapIndices.find(collType); typeIt != m_versionMapIndices.end()) { + const auto [currentVersion, mapIndex] = typeIt->second; + if (fromVersion == currentVersion) { + return oldBuffers; // Nothing to do here + } + + const auto& typeEvolFuncs = m_evolutionFuncs[mapIndex]; + if (fromVersion < typeEvolFuncs.size() - 1) { + // Do we need this check? In principle we could ensure at registration + // time that this is always guaranteed + return typeEvolFuncs[fromVersion - 1](oldBuffers, fromVersion); + } + } + + std::cerr << "PODIO WARNING: evolveBuffers has no knowledge of how to evolve buffers for " << collType << std::endl; + // TODO: exception + return oldBuffers; +} + +void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, + SchemaVersionT currentVersion, const EvolutionFuncT& evolutionFunc, + Priority priority) { + auto typeIt = m_versionMapIndices.find(collType); + if (typeIt == m_versionMapIndices.end()) { + // Create an entry for this type + std::tie(typeIt, std::ignore) = + m_versionMapIndices.emplace(collType, MapIndex{currentVersion, MapIndex::NoEvolutionAvailable}); + } + + // If we do not have any evolution funcs yet, create the necessary mapping + // structure and update the index + if (typeIt->second.index == MapIndex::NoEvolutionAvailable) { + typeIt->second.index = m_evolutionFuncs.size(); + m_evolutionFuncs.emplace_back(EvolFuncVersionMapT{}); + } + + // From here on out we don't need the mutabale any longer + const auto& [_, mapIndex] = typeIt->second; + + auto& versionMap = m_evolutionFuncs[mapIndex]; + const auto prevSize = versionMap.size(); + if (prevSize < fromVersion) { + versionMap.resize(fromVersion); + versionMap[fromVersion - 1] = evolutionFunc; + } else { + if (priority == Priority::UserDefined) { + versionMap[fromVersion - 1] = evolutionFunc; + } else { + std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; + } + } +} + +podio::CollectionReadBuffers SchemaEvolution::noOpSchemaEvolution(podio::CollectionReadBuffers&& buffers, + SchemaVersionT) { + return buffers; +} + +} // namespace podio diff --git a/src/UserDataCollection.cc b/src/UserDataCollection.cc index 71ea34d48..c965f5b9c 100644 --- a/src/UserDataCollection.cc +++ b/src/UserDataCollection.cc @@ -1,6 +1,7 @@ #include "podio/UserDataCollection.h" #include "podio/CollectionBufferFactory.h" #include "podio/CollectionBuffers.h" +#include "podio/SchemaEvolution.h" #include #include @@ -19,7 +20,11 @@ namespace { // Register with schema version 1 to allow for potential changes CollectionBufferFactory::mutInstance().registerCreationFunc( userDataCollTypeName(), UserDataCollection::schemaVersion, [](bool) { - return podio::CollectionReadBuffers{new std::vector(), nullptr, nullptr, + return podio::CollectionReadBuffers{new std::vector(), + nullptr, + nullptr, + podio::UserDataCollection::schemaVersion, + podio::userDataCollTypeName(), [](podio::CollectionReadBuffers buffers, bool) { return std::make_unique>( std::move(*buffers.dataAsVector())); @@ -29,6 +34,12 @@ namespace { }}; }); + // For now passing the same schema version for from and current versions + // just to make SchemaEvolution aware of UserDataCollections. + podio::SchemaEvolution::mutInstance().registerEvolutionFunc( + podio::userDataCollTypeName(), UserDataCollection::schemaVersion, UserDataCollection::schemaVersion, + SchemaEvolution::noOpSchemaEvolution, SchemaEvolution::Priority::AutoGenerated); + return 1; } From d8294d2a2aad3c737c7b9f07ad9e5b2d826f50b7 Mon Sep 17 00:00:00 2001 From: Ananya Gupta <90386813+Ananya2003Gupta@users.noreply.github.com> Date: Fri, 30 Jun 2023 15:08:16 +0530 Subject: [PATCH 082/100] Make error message from parsing more explicit (#437) --- python/podio/podio_config_reader.py | 32 ++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/python/podio/podio_config_reader.py b/python/podio/podio_config_reader.py index b6836f0d3..c3d9b4106 100644 --- a/python/podio/podio_config_reader.py +++ b/python/podio/podio_config_reader.py @@ -57,7 +57,7 @@ def _parse_with_regexps(string, regexps_callbacks): if result: return callback(result) - raise DefinitionError(f"'{string}' is not a valid member definition") + raise DefinitionError(f"'{string}' is not a valid member definition. Check syntax of the member definition.") @staticmethod def _full_array_conv(result): @@ -85,18 +85,26 @@ def _bare_member_conv(result): def parse(self, string, require_description=True): """Parse the passed string""" - matchers_cbs = [ + default_matchers_cbs = [ (self.full_array_re, self._full_array_conv), - (self.member_re, self._full_member_conv) - ] - - if not require_description: - matchers_cbs.extend(( - (self.bare_array_re, self._bare_array_conv), - (self.bare_member_re, self._bare_member_conv) - )) - - return self._parse_with_regexps(string, matchers_cbs) + (self.member_re, self._full_member_conv)] + + no_desc_matchers_cbs = [ + (self.bare_array_re, self._bare_array_conv), + (self.bare_member_re, self._bare_member_conv)] + + if require_description: + try: + return self._parse_with_regexps(string, default_matchers_cbs) + except DefinitionError: + # check whether we could parse this if we don't require a description and + # provide more details in the error if we can + self._parse_with_regexps(string, no_desc_matchers_cbs) + # pylint: disable-next=raise-missing-from + raise DefinitionError(f"'{string}' is not a valid member definition. Description comment is missing.\n" + "Correct Syntax: // ") + + return self._parse_with_regexps(string, default_matchers_cbs + no_desc_matchers_cbs) class ClassDefinitionValidator: From 1e8a8f9cf7fcca19da4c508cf339ddbf755057bd Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Tue, 11 Jul 2023 12:18:44 +0200 Subject: [PATCH 083/100] Add support for the new RNTuple format (#395) * Add a RNTuple writer * Cleanup and add a reader * Add compilation instructions for RNTuple * Add tests * Fix the reader and writer so that they pass most of the tests * Commit missing changes in the header * Add support for Generic Parameters * Add an ugly workaround to the unique_ptr issue * Read also vector members and remove some comments * Do a bit of cleanup * Do more cleanup, also compiler warnings * Add names in rootUtils.h, fix a few compiler warnings * Add a few minor changes * Add missing changes in the headers * Change map -> unordered_map and use append in CMakeLists.txt * Simplify writing and reading of generic parameters * Only create the ID table once * Add CollectionInfo structs * Add a ROOT version check * Add missing endif() * Add Name at the end of some names * Add missing Name at the end * Cast to rvalue * Cache entries and reserve * Add comment and remove old comments * Remove a few couts * Remove intermediate variables and use std::move * Run clang-format * Use clang-format on tests too * Enable RNTuple I/O in Key4hep CI * Check if dev3 workflows come with recent enough ROOT * Change MakeField to the new signature * Update the RNTuple reader and writer to use the buffer factory * Run clang-format * Update the RNTuple writer to use a bare model * Add friends for Generic Parameters * Update changes after the changes in the collectionID and string_view * Run clang-format * Update the reader and writer to conform to #405 * Reorganize and clean up code in the reader * Run clang-format * Simplify how the references are filled --------- Co-authored-by: jmcarcell Co-authored-by: tmadlener --- .github/workflows/key4hep.yml | 3 +- .github/workflows/ubuntu.yml | 7 +- CMakeLists.txt | 10 +- include/podio/CollectionBuffers.h | 1 + include/podio/GenericParameters.h | 8 + include/podio/ROOTNTupleReader.h | 104 +++++++++ include/podio/ROOTNTupleWriter.h | 74 ++++++ include/podio/UserDataCollection.h | 2 +- python/templates/CollectionData.cc.jinja2 | 1 + src/CMakeLists.txt | 18 ++ src/ROOTNTupleReader.cc | 186 +++++++++++++++ src/ROOTNTupleWriter.cc | 263 ++++++++++++++++++++++ src/rootUtils.h | 69 ++++++ tests/root_io/CMakeLists.txt | 10 + tests/root_io/read_rntuple.cpp | 6 + tests/root_io/write_rntuple.cpp | 6 + 16 files changed, 762 insertions(+), 6 deletions(-) create mode 100644 include/podio/ROOTNTupleReader.h create mode 100644 include/podio/ROOTNTupleWriter.h create mode 100644 src/ROOTNTupleReader.cc create mode 100644 src/ROOTNTupleWriter.cc create mode 100644 tests/root_io/read_rntuple.cpp create mode 100644 tests/root_io/write_rntuple.cpp diff --git a/.github/workflows/key4hep.yml b/.github/workflows/key4hep.yml index 92240b93e..43f69ef67 100644 --- a/.github/workflows/key4hep.yml +++ b/.github/workflows/key4hep.yml @@ -30,7 +30,8 @@ jobs: -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ - -DUSE_EXTERNAL_CATCH2=AUTO \ + -DUSE_EXTERNAL_CATCH2=ON \ + -DENABLE_RNTUPLE=ON \ -G Ninja .. echo "::endgroup::" echo "::group::Build" diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 16ae05bee..9814ca7cb 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -30,9 +30,10 @@ jobs: -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ - -DUSE_EXTERNAL_CATCH2=OFF \ - -DPODIO_SET_RPATH=ON \ - -G Ninja .. + -DUSE_EXTERNAL_CATCH2=OFF \ + -DPODIO_SET_RPATH=ON \ + -DENABLE_RNTUPLE=ON \ + -G Ninja .. echo "::endgroup::" echo "::group::Build" ninja -k0 diff --git a/CMakeLists.txt b/CMakeLists.txt index de56ea75a..35c521882 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,12 +68,20 @@ ADD_CLANG_TIDY() option(CREATE_DOC "Whether or not to create doxygen doc target." OFF) option(ENABLE_SIO "Build SIO I/O support" OFF) option(PODIO_RELAX_PYVER "Do not require exact python version match with ROOT" OFF) +option(ENABLE_RNTUPLE "Build with support for the new ROOT NTtuple format" OFF) #--- Declare ROOT dependency --------------------------------------------------- list(APPEND CMAKE_PREFIX_PATH $ENV{ROOTSYS}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) -find_package(ROOT REQUIRED COMPONENTS RIO Tree) +if(NOT ENABLE_RNTUPLE) + find_package(ROOT REQUIRED COMPONENTS RIO Tree) +else() + find_package(ROOT REQUIRED COMPONENTS RIO Tree ROOTNTuple) + if(${ROOT_VERSION} VERSION_LESS 6.28.02) + message(FATAL_ERROR "You are trying to build podio with support for the new ROOT NTuple format, but your ROOT version is too old. Please update ROOT to at least version 6.28.02") + endif() +endif() # Check that root is compiled with a modern enough c++ standard get_target_property(ROOT_COMPILE_FEATURES ROOT::Core INTERFACE_COMPILE_FEATURES) diff --git a/include/podio/CollectionBuffers.h b/include/podio/CollectionBuffers.h index 37ee07fe4..b51161c2d 100644 --- a/include/podio/CollectionBuffers.h +++ b/include/podio/CollectionBuffers.h @@ -27,6 +27,7 @@ using VectorMembersInfo = std::vector>; */ struct CollectionWriteBuffers { void* data{nullptr}; + void* vecPtr{nullptr}; CollRefCollection* references{nullptr}; VectorMembersInfo* vectorMembers{nullptr}; diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index 311b622b5..eeee3696f 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -18,6 +18,11 @@ class write_device; using version_type = uint32_t; // from sio/definitions } // namespace sio +namespace podio { +class ROOTNTupleReader; +class ROOTNTupleWriter; +} // namespace podio + #define DEPR_NON_TEMPLATE \ [[deprecated("Non-templated access will be removed. Switch to templated access functionality")]] @@ -145,6 +150,8 @@ class GenericParameters { friend void writeGenericParameters(sio::write_device& device, const GenericParameters& parameters); friend void readGenericParameters(sio::read_device& device, GenericParameters& parameters, sio::version_type version); + friend ROOTNTupleReader; + friend ROOTNTupleWriter; /// Get a reference to the internal map for a given type template @@ -187,6 +194,7 @@ class GenericParameters { } } +private: /// Get the mutex that guards the map for the given type template std::mutex& getMutex() const { diff --git a/include/podio/ROOTNTupleReader.h b/include/podio/ROOTNTupleReader.h new file mode 100644 index 000000000..a25f66f2a --- /dev/null +++ b/include/podio/ROOTNTupleReader.h @@ -0,0 +1,104 @@ +#ifndef PODIO_ROOTNTUPLEREADER_H +#define PODIO_ROOTNTUPLEREADER_H + +#include "podio/CollectionBranches.h" +#include "podio/ICollectionProvider.h" +#include "podio/ROOTFrameData.h" +#include "podio/SchemaEvolution.h" +#include "podio/podioVersion.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" + +#include +#include +#include + +#include +#include + +namespace podio { + +/** +This class has the function to read available data from disk +and to prepare collections and buffers. +**/ +class ROOTNTupleReader { + +public: + ROOTNTupleReader() = default; + ~ROOTNTupleReader() = default; + + ROOTNTupleReader(const ROOTNTupleReader&) = delete; + ROOTNTupleReader& operator=(const ROOTNTupleReader&) = delete; + + void openFile(const std::string& filename); + void openFiles(const std::vector& filename); + + /** + * Read the next data entry from which a Frame can be constructed for the + * given name. In case there are no more entries left for this name or in + * case there is no data for this name, this returns a nullptr. + */ + std::unique_ptr readNextEntry(const std::string& name); + + /** + * Read the specified data entry from which a Frame can be constructed for + * the given name. In case the entry does not exist for this name or in case + * there is no data for this name, this returns a nullptr. + */ + std::unique_ptr readEntry(const std::string& name, const unsigned entry); + + /// Returns number of entries for the given name + unsigned getEntries(const std::string& name); + + /// Get the build version of podio that has been used to write the current file + podio::version::Version currentFileVersion() const { + return m_fileVersion; + } + + void closeFile(); + +private: + /** + * Initialize the given category by filling the maps with metadata information + * that will be used later + */ + bool initCategory(const std::string& category); + + /** + * Read and reconstruct the generic parameters of the Frame + */ + GenericParameters readEventMetaData(const std::string& name, unsigned entNum); + + template + void readParams(const std::string& name, unsigned entNum, GenericParameters& params); + + std::unique_ptr m_metadata{}; + + podio::version::Version m_fileVersion{}; + DatamodelDefinitionHolder m_datamodelHolder{}; + + std::unordered_map>> m_readers{}; + std::unordered_map> m_metadata_readers{}; + std::vector m_filenames{}; + + std::unordered_map m_entries{}; + std::unordered_map m_totalEntries{}; + + struct CollectionInfo { + std::vector id{}; + std::vector name{}; + std::vector type{}; + std::vector isSubsetCollection{}; + std::vector schemaVersion{}; + }; + + std::unordered_map m_collectionInfo{}; + + std::vector m_availableCategories{}; + + std::shared_ptr m_table{}; +}; + +} // namespace podio + +#endif diff --git a/include/podio/ROOTNTupleWriter.h b/include/podio/ROOTNTupleWriter.h new file mode 100644 index 000000000..0f6f6d466 --- /dev/null +++ b/include/podio/ROOTNTupleWriter.h @@ -0,0 +1,74 @@ +#ifndef PODIO_ROOTNTUPLEWRITER_H +#define PODIO_ROOTNTUPLEWRITER_H + +#include "podio/CollectionBase.h" +#include "podio/Frame.h" +#include "podio/GenericParameters.h" +#include "podio/SchemaEvolution.h" +#include "podio/utilities/DatamodelRegistryIOHelpers.h" + +#include "TFile.h" +#include +#include + +#include +#include +#include + +namespace podio { + +class ROOTNTupleWriter { +public: + ROOTNTupleWriter(const std::string& filename); + ~ROOTNTupleWriter(); + + ROOTNTupleWriter(const ROOTNTupleWriter&) = delete; + ROOTNTupleWriter& operator=(const ROOTNTupleWriter&) = delete; + + template + void fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry); + + void writeFrame(const podio::Frame& frame, const std::string& category); + void writeFrame(const podio::Frame& frame, const std::string& category, const std::vector& collsToWrite); + void finish(); + +private: + using StoreCollection = std::pair; + std::unique_ptr createModels(const std::vector& collections); + + std::unique_ptr m_metadata{}; + std::unordered_map> m_writers{}; + std::unique_ptr m_metadataWriter{}; + + std::unique_ptr m_file{}; + + DatamodelDefinitionCollector m_datamodelCollector{}; + + struct CollectionInfo { + std::vector id{}; + std::vector name{}; + std::vector type{}; + std::vector isSubsetCollection{}; + std::vector schemaVersion{}; + }; + + std::unordered_map m_collectionInfo{}; + + std::set m_categories{}; + + bool m_finished{false}; + + std::vector m_intkeys{}, m_floatkeys{}, m_doublekeys{}, m_stringkeys{}; + + std::vector> m_intvalues{}; + std::vector> m_floatvalues{}; + std::vector> m_doublevalues{}; + std::vector> m_stringvalues{}; + + template + std::pair&, std::vector>&> getKeyValueVectors(); +}; + +} // namespace podio + +#endif // PODIO_ROOTNTUPLEWRITER_H diff --git a/include/podio/UserDataCollection.h b/include/podio/UserDataCollection.h index b9aefaf40..cc5b7154f 100644 --- a/include/podio/UserDataCollection.h +++ b/include/podio/UserDataCollection.h @@ -123,7 +123,7 @@ class UserDataCollection : public CollectionBase { /// Get the collection buffers for this collection podio::CollectionWriteBuffers getBuffers() override { _vecPtr = &_vec; // Set the pointer to the correct internal vector - return {&_vecPtr, &m_refCollections, &m_vecmem_info}; + return {&_vecPtr, _vecPtr, &m_refCollections, &m_vecmem_info}; } /// check for validity of the container after read diff --git a/python/templates/CollectionData.cc.jinja2 b/python/templates/CollectionData.cc.jinja2 index 3ae5d3a80..3946ad756 100644 --- a/python/templates/CollectionData.cc.jinja2 +++ b/python/templates/CollectionData.cc.jinja2 @@ -92,6 +92,7 @@ podio::CollectionWriteBuffers {{ class_type }}::getCollectionBuffers(bool isSubs return { isSubsetColl ? nullptr : (void*)&m_data, + isSubsetColl ? nullptr : (void*)m_data.get(), &m_refCollections, // only need to store the ObjectIDs of the referenced objects &m_vecmem_info }; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 394f23057..daea12e5f 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -34,6 +34,9 @@ FUNCTION(PODIO_ADD_LIB_AND_DICT libname headers sources selection ) $ $) target_link_libraries(${dictname} PUBLIC podio::${libname} podio::podio ROOT::Core ROOT::Tree) + if(ENABLE_RNTUPLE) + target_link_libraries(${dictname} PUBLIC ROOT::ROOTNTuple) + endif() PODIO_GENERATE_DICTIONARY(${dictname} ${headers} SELECTION ${selection} OPTIONS --library ${CMAKE_SHARED_LIBRARY_PREFIX}${dictname}${CMAKE_SHARED_LIBRARY_SUFFIX} ) @@ -83,15 +86,30 @@ SET(root_sources ROOTFrameReader.cc ROOTLegacyReader.cc ) +if(ENABLE_RNTUPLE) + list(APPEND root_sources + ROOTNTupleReader.cc + ROOTNTupleWriter.cc + ) +endif() SET(root_headers ${CMAKE_SOURCE_DIR}/include/podio/ROOTFrameReader.h ${CMAKE_SOURCE_DIR}/include/podio/ROOTLegacyReader.h ${CMAKE_SOURCE_DIR}/include/podio/ROOTFrameWriter.h ) +if(ENABLE_RNTUPLE) + list(APPEND root_headers + ${CMAKE_SOURCE_DIR}/include/podio/ROOTNTupleReader.h + ${CMAKE_SOURCE_DIR}/include/podio/ROOTNTupleWriter.h + ) +endif() PODIO_ADD_LIB_AND_DICT(podioRootIO "${root_headers}" "${root_sources}" root_selection.xml) target_link_libraries(podioRootIO PUBLIC podio::podio ROOT::Core ROOT::RIO ROOT::Tree) +if(ENABLE_RNTUPLE) + target_link_libraries(podioRootIO PUBLIC ROOT::ROOTNTuple) +endif() # --- Python EventStore for enabling (legacy) python bindings diff --git a/src/ROOTNTupleReader.cc b/src/ROOTNTupleReader.cc new file mode 100644 index 000000000..299f88da3 --- /dev/null +++ b/src/ROOTNTupleReader.cc @@ -0,0 +1,186 @@ +#include "podio/ROOTNTupleReader.h" +#include "podio/CollectionBase.h" +#include "podio/CollectionBufferFactory.h" +#include "podio/CollectionBuffers.h" +#include "podio/CollectionIDTable.h" +#include "podio/DatamodelRegistry.h" +#include "podio/GenericParameters.h" +#include "rootUtils.h" + +#include "TClass.h" +#include +#include + +namespace podio { + +template +void ROOTNTupleReader::readParams(const std::string& name, unsigned entNum, GenericParameters& params) { + auto keyView = m_readers[name][0]->GetView>(root_utils::getGPKeyName()); + auto valueView = m_readers[name][0]->GetView>>(root_utils::getGPValueName()); + + for (size_t i = 0; i < keyView(entNum).size(); ++i) { + params.getMap().emplace(std::move(keyView(entNum)[i]), std::move(valueView(entNum)[i])); + } +} + +GenericParameters ROOTNTupleReader::readEventMetaData(const std::string& name, unsigned entNum) { + GenericParameters params; + + readParams(name, entNum, params); + readParams(name, entNum, params); + readParams(name, entNum, params); + readParams(name, entNum, params); + + return params; +} + +bool ROOTNTupleReader::initCategory(const std::string& category) { + if (std::find(m_availableCategories.begin(), m_availableCategories.end(), category) == m_availableCategories.end()) { + return false; + } + // Assume that the metadata is the same in all files + auto filename = m_filenames[0]; + + auto id = m_metadata_readers[filename]->GetView>(root_utils::idTableName(category)); + m_collectionInfo[category].id = id(0); + + auto collectionName = + m_metadata_readers[filename]->GetView>(root_utils::collectionName(category)); + m_collectionInfo[category].name = collectionName(0); + + auto collectionType = + m_metadata_readers[filename]->GetView>(root_utils::collInfoName(category)); + m_collectionInfo[category].type = collectionType(0); + + auto subsetCollection = + m_metadata_readers[filename]->GetView>(root_utils::subsetCollection(category)); + m_collectionInfo[category].isSubsetCollection = subsetCollection(0); + + auto schemaVersion = m_metadata_readers[filename]->GetView>("schemaVersion_" + category); + m_collectionInfo[category].schemaVersion = schemaVersion(0); + + return true; +} + +void ROOTNTupleReader::openFile(const std::string& filename) { + openFiles({filename}); +} + +void ROOTNTupleReader::openFiles(const std::vector& filenames) { + + m_filenames.insert(m_filenames.end(), filenames.begin(), filenames.end()); + for (auto& filename : filenames) { + if (m_metadata_readers.find(filename) == m_metadata_readers.end()) { + m_metadata_readers[filename] = ROOT::Experimental::RNTupleReader::Open(root_utils::metaTreeName, filename); + } + } + + m_metadata = ROOT::Experimental::RNTupleReader::Open(root_utils::metaTreeName, filenames[0]); + + auto versionView = m_metadata->GetView>(root_utils::versionBranchName); + auto version = versionView(0); + + m_fileVersion = podio::version::Version{version[0], version[1], version[2]}; + + auto edmView = m_metadata->GetView>>(root_utils::edmDefBranchName); + auto edm = edmView(0); + + auto availableCategoriesField = m_metadata->GetView>(root_utils::availableCategories); + m_availableCategories = availableCategoriesField(0); +} + +unsigned ROOTNTupleReader::getEntries(const std::string& name) { + if (m_readers.find(name) == m_readers.end()) { + for (auto& filename : m_filenames) { + try { + m_readers[name].emplace_back(ROOT::Experimental::RNTupleReader::Open(name, filename)); + } catch (const ROOT::Experimental::RException& e) { + std::cout << "Category " << name << " not found in file " << filename << std::endl; + } + } + m_totalEntries[name] = std::accumulate(m_readers[name].begin(), m_readers[name].end(), 0, + [](int total, auto& reader) { return total + reader->GetNEntries(); }); + } + return m_totalEntries[name]; +} + +std::unique_ptr ROOTNTupleReader::readNextEntry(const std::string& name) { + return readEntry(name, m_entries[name]); +} + +std::unique_ptr ROOTNTupleReader::readEntry(const std::string& category, const unsigned entNum) { + if (m_totalEntries.find(category) == m_totalEntries.end()) { + getEntries(category); + } + if (entNum >= m_totalEntries[category]) { + return nullptr; + } + + if (m_collectionInfo.find(category) == m_collectionInfo.end()) { + if (!initCategory(category)) { + return nullptr; + } + } + + m_entries[category] = entNum + 1; + + ROOTFrameData::BufferMap buffers; + auto dentry = m_readers[category][0]->GetModel()->GetDefaultEntry(); + + for (size_t i = 0; i < m_collectionInfo[category].id.size(); ++i) { + const auto collectionClass = TClass::GetClass(m_collectionInfo[category].type[i].c_str()); + + auto collection = + std::unique_ptr(static_cast(collectionClass->New())); + + const auto& bufferFactory = podio::CollectionBufferFactory::instance(); + auto maybeBuffers = + bufferFactory.createBuffers(m_collectionInfo[category].type[i], m_collectionInfo[category].schemaVersion[i], + m_collectionInfo[category].isSubsetCollection[i]); + auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); + + if (!maybeBuffers) { + std::cout << "WARNING: Buffers couldn't be created for collection " << m_collectionInfo[category].name[i] + << " of type " << m_collectionInfo[category].type[i] << " and schema version " + << m_collectionInfo[category].schemaVersion[i] << std::endl; + return nullptr; + } + + if (m_collectionInfo[category].isSubsetCollection[i]) { + auto brName = root_utils::subsetBranch(m_collectionInfo[category].name[i]); + auto vec = new std::vector; + dentry->CaptureValueUnsafe(brName, vec); + collBuffers.references->at(0) = std::unique_ptr>(vec); + } else { + dentry->CaptureValueUnsafe(m_collectionInfo[category].name[i], collBuffers.data); + + const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(collection->getTypeName()); + for (size_t j = 0; j < relVecNames.relations.size(); ++j) { + const auto relName = relVecNames.relations[j]; + auto vec = new std::vector; + const auto brName = root_utils::refBranch(m_collectionInfo[category].name[i], relName); + dentry->CaptureValueUnsafe(brName, vec); + collBuffers.references->at(j) = std::unique_ptr>(vec); + } + + for (size_t j = 0; j < relVecNames.vectorMembers.size(); ++j) { + const auto vecName = relVecNames.vectorMembers[j]; + const auto brName = root_utils::vecBranch(m_collectionInfo[category].name[i], vecName); + dentry->CaptureValueUnsafe(brName, collBuffers.vectorMembers->at(j).second); + } + } + + buffers.emplace(m_collectionInfo[category].name[i], std::move(collBuffers)); + } + + m_readers[category][0]->LoadEntry(entNum); + + auto parameters = readEventMetaData(category, entNum); + if (!m_table) { + m_table = std::make_shared(m_collectionInfo[category].id, m_collectionInfo[category].name); + } + + return std::make_unique(std::move(buffers), m_table, std::move(parameters)); +} + +} // namespace podio diff --git a/src/ROOTNTupleWriter.cc b/src/ROOTNTupleWriter.cc new file mode 100644 index 000000000..741af53b3 --- /dev/null +++ b/src/ROOTNTupleWriter.cc @@ -0,0 +1,263 @@ +#include "podio/ROOTNTupleWriter.h" +#include "podio/CollectionBase.h" +#include "podio/DatamodelRegistry.h" +#include "podio/GenericParameters.h" +#include "podio/SchemaEvolution.h" +#include "podio/podioVersion.h" +#include "rootUtils.h" + +#include "TFile.h" +#include +#include +#include + +#include + +namespace podio { + +ROOTNTupleWriter::ROOTNTupleWriter(const std::string& filename) : + m_metadata(ROOT::Experimental::RNTupleModel::Create()), + m_file(new TFile(filename.c_str(), "RECREATE", "data file")) { +} + +ROOTNTupleWriter::~ROOTNTupleWriter() { + if (!m_finished) { + finish(); + } +} + +template +std::pair&, std::vector>&> ROOTNTupleWriter::getKeyValueVectors() { + if constexpr (std::is_same_v) { + return {m_intkeys, m_intvalues}; + } else if constexpr (std::is_same_v) { + return {m_floatkeys, m_floatvalues}; + } else if constexpr (std::is_same_v) { + return {m_doublekeys, m_doublevalues}; + } else if constexpr (std::is_same_v) { + return {m_stringkeys, m_stringvalues}; + } else { + throw std::runtime_error("Unknown type"); + } +} + +template +void ROOTNTupleWriter::fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry) { + auto [key, value] = getKeyValueVectors(); + entry->CaptureValueUnsafe(root_utils::getGPKeyName(), &key); + entry->CaptureValueUnsafe(root_utils::getGPValueName(), &value); + + key.clear(); + key.reserve(params.getMap().size()); + value.clear(); + value.reserve(params.getMap().size()); + + for (auto& [kk, vv] : params.getMap()) { + key.emplace_back(kk); + value.emplace_back(vv); + } +} + +void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& category) { + writeFrame(frame, category, frame.getAvailableCollections()); +} + +void ROOTNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& category, + const std::vector& collsToWrite) { + + std::vector collections; + collections.reserve(collsToWrite.size()); + for (const auto& name : collsToWrite) { + auto* coll = frame.getCollectionForWrite(name); + collections.emplace_back(name, const_cast(coll)); + } + + bool new_category = false; + if (m_writers.find(category) == m_writers.end()) { + new_category = true; + auto model = createModels(collections); + m_writers[category] = ROOT::Experimental::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {}); + } + + auto entry = m_writers[category]->GetModel()->CreateBareEntry(); + + ROOT::Experimental::RNTupleWriteOptions options; + options.SetCompression(ROOT::RCompressionSetting::EDefaults::kUseGeneralPurpose); + + for (const auto& [name, coll] : collections) { + auto collBuffers = coll->getBuffers(); + if (collBuffers.vecPtr) { + entry->CaptureValueUnsafe(name, (void*)collBuffers.vecPtr); + } + + if (coll->isSubsetCollection()) { + auto& refColl = (*collBuffers.references)[0]; + const auto brName = root_utils::subsetBranch(name); + entry->CaptureValueUnsafe(brName, refColl.get()); + } else { + + const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(coll->getValueTypeName()); + if (auto refColls = collBuffers.references) { + int i = 0; + for (auto& c : (*refColls)) { + const auto brName = root_utils::refBranch(name, relVecNames.relations[i]); + entry->CaptureValueUnsafe(brName, c.get()); + ++i; + } + } + + if (auto vmInfo = collBuffers.vectorMembers) { + int i = 0; + for (auto& [type, vec] : (*vmInfo)) { + const auto typeName = "vector<" + type + ">"; + const auto brName = root_utils::vecBranch(name, relVecNames.vectorMembers[i]); + auto ptr = *(std::vector**)vec; + entry->CaptureValueUnsafe(brName, ptr); + ++i; + } + } + } + + // Not supported + // entry->CaptureValueUnsafe(root_utils::paramBranchName, + // &const_cast(frame.getParameters())); + + if (new_category) { + m_collectionInfo[category].id.emplace_back(coll->getID()); + m_collectionInfo[category].name.emplace_back(name); + m_collectionInfo[category].type.emplace_back(coll->getTypeName()); + m_collectionInfo[category].isSubsetCollection.emplace_back(coll->isSubsetCollection()); + m_collectionInfo[category].schemaVersion.emplace_back(coll->getSchemaVersion()); + } + } + + auto params = frame.getParameters(); + fillParams(params, entry.get()); + fillParams(params, entry.get()); + fillParams(params, entry.get()); + fillParams(params, entry.get()); + + m_writers[category]->Fill(*entry); + m_categories.insert(category); +} + +std::unique_ptr +ROOTNTupleWriter::createModels(const std::vector& collections) { + auto model = ROOT::Experimental::RNTupleModel::CreateBare(); + for (auto& [name, coll] : collections) { + const auto collBuffers = coll->getBuffers(); + + if (collBuffers.vecPtr) { + auto collClassName = "std::vector<" + std::string(coll->getDataTypeName()) + ">"; + auto field = ROOT::Experimental::Detail::RFieldBase::Create(name, collClassName).Unwrap(); + model->AddField(std::move(field)); + } + + if (coll->isSubsetCollection()) { + const auto brName = root_utils::subsetBranch(name); + auto collClassName = "vector"; + auto field = ROOT::Experimental::Detail::RFieldBase::Create(brName, collClassName).Unwrap(); + model->AddField(std::move(field)); + } else { + + const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(coll->getValueTypeName()); + if (auto refColls = collBuffers.references) { + int i = 0; + for (auto& c [[maybe_unused]] : (*refColls)) { + const auto brName = root_utils::refBranch(name, relVecNames.relations[i]); + auto collClassName = "vector"; + auto field = ROOT::Experimental::Detail::RFieldBase::Create(brName, collClassName).Unwrap(); + model->AddField(std::move(field)); + ++i; + } + } + + if (auto vminfo = collBuffers.vectorMembers) { + int i = 0; + for (auto& [type, vec] : (*vminfo)) { + const auto typeName = "vector<" + type + ">"; + const auto brName = root_utils::vecBranch(name, relVecNames.vectorMembers[i]); + auto field = ROOT::Experimental::Detail::RFieldBase::Create(brName, typeName).Unwrap(); + model->AddField(std::move(field)); + ++i; + } + } + } + } + + // Not supported by ROOT because podio::GenericParameters has map types + // so we have to split them manually + // model->MakeField(root_utils::paramBranchName); + + model->AddField( + ROOT::Experimental::Detail::RFieldBase::Create(root_utils::intKeyName, "std::vector>").Unwrap()); + model->AddField( + ROOT::Experimental::Detail::RFieldBase::Create(root_utils::floatKeyName, "std::vector>").Unwrap()); + model->AddField( + ROOT::Experimental::Detail::RFieldBase::Create(root_utils::doubleKeyName, "std::vector>").Unwrap()); + model->AddField( + ROOT::Experimental::Detail::RFieldBase::Create(root_utils::stringKeyName, "std::vector>").Unwrap()); + + model->AddField( + ROOT::Experimental::Detail::RFieldBase::Create(root_utils::intValueName, "std::vector>") + .Unwrap()); + model->AddField( + ROOT::Experimental::Detail::RFieldBase::Create(root_utils::floatValueName, "std::vector>") + .Unwrap()); + model->AddField( + ROOT::Experimental::Detail::RFieldBase::Create(root_utils::doubleValueName, "std::vector>") + .Unwrap()); + model->AddField(ROOT::Experimental::Detail::RFieldBase::Create(root_utils::stringValueName, + "std::vector>") + .Unwrap()); + + model->Freeze(); + return model; +} + +void ROOTNTupleWriter::finish() { + + auto podioVersion = podio::version::build_version; + auto versionField = m_metadata->MakeField>(root_utils::versionBranchName); + *versionField = {podioVersion.major, podioVersion.minor, podioVersion.patch}; + + auto edmDefinitions = m_datamodelCollector.getDatamodelDefinitionsToWrite(); + auto edmField = + m_metadata->MakeField>>(root_utils::edmDefBranchName); + *edmField = edmDefinitions; + + auto availableCategoriesField = m_metadata->MakeField>(root_utils::availableCategories); + for (auto& [c, _] : m_collectionInfo) { + availableCategoriesField->push_back(c); + } + + for (auto& category : m_categories) { + auto idField = m_metadata->MakeField>({root_utils::idTableName(category)}); + *idField = m_collectionInfo[category].id; + auto collectionNameField = m_metadata->MakeField>({root_utils::collectionName(category)}); + *collectionNameField = m_collectionInfo[category].name; + auto collectionTypeField = m_metadata->MakeField>({root_utils::collInfoName(category)}); + *collectionTypeField = m_collectionInfo[category].type; + auto subsetCollectionField = m_metadata->MakeField>({root_utils::subsetCollection(category)}); + *subsetCollectionField = m_collectionInfo[category].isSubsetCollection; + auto schemaVersionField = m_metadata->MakeField>({"schemaVersion_" + category}); + *schemaVersionField = m_collectionInfo[category].schemaVersion; + } + + m_metadata->Freeze(); + m_metadataWriter = + ROOT::Experimental::RNTupleWriter::Append(std::move(m_metadata), root_utils::metaTreeName, *m_file, {}); + + m_metadataWriter->Fill(); + + m_file->Write(); + + // All the tuple writers must be deleted before the file so that they flush + // unwritten output + m_writers.clear(); + m_metadataWriter.reset(); + + m_finished = true; +} + +} // namespace podio diff --git a/src/rootUtils.h b/src/rootUtils.h index 507d24b15..523d5c228 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -32,6 +32,75 @@ constexpr static auto metaTreeName = "podio_metadata"; */ constexpr static auto paramBranchName = "PARAMETERS"; +/** + * Names of the fields with the keys and values of the generic parameters for + * the RNTuples until map types are supported + */ +constexpr static auto intKeyName = "GPIntKeys"; +constexpr static auto floatKeyName = "GPFloatKeys"; +constexpr static auto doubleKeyName = "GPDoubleKeys"; +constexpr static auto stringKeyName = "GPStringKeys"; + +constexpr static auto intValueName = "GPIntValues"; +constexpr static auto floatValueName = "GPFloatValues"; +constexpr static auto doubleValueName = "GPDoubleValues"; +constexpr static auto stringValueName = "GPStringValues"; + +/** + * Get the name of the key depending on the type + */ +template +constexpr auto getGPKeyName() { + if constexpr (std::is_same::value) { + return intKeyName; + } else if constexpr (std::is_same::value) { + return floatKeyName; + } else if constexpr (std::is_same::value) { + return doubleKeyName; + } else if constexpr (std::is_same::value) { + return stringKeyName; + } else { + static_assert(sizeof(T) == 0, "Unsupported type for generic parameters"); + } +} + +/** + * Get the name of the value depending on the type + */ +template +constexpr auto getGPValueName() { + if constexpr (std::is_same::value) { + return intValueName; + } else if constexpr (std::is_same::value) { + return floatValueName; + } else if constexpr (std::is_same::value) { + return doubleValueName; + } else if constexpr (std::is_same::value) { + return stringValueName; + } else { + static_assert(sizeof(T) == 0, "Unsupported type for generic parameters"); + } +} + +/** + * Name of the field with the list of categories for RNTuples + */ +constexpr static auto availableCategories = "availableCategories"; + +/** + * Name of the field with the names of the collections for RNTuples + */ +inline std::string collectionName(const std::string& category) { + return category + "_collectionNames"; +} + +/** + * Name of the field with the flag for subset collections for RNTuples + */ +inline std::string subsetCollection(const std::string& category) { + return category + "_isSubsetCollections"; +} + /** * The name of the branch into which we store the build version of podio at the * time of writing the file diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index d0d8b21c4..ad1537c23 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -12,6 +12,13 @@ set(root_dependent_tests read_frame_legacy_root.cpp read_frame_root_multiple.cpp ) +if(ENABLE_RNTUPLE) + set(root_dependent_tests + ${root_dependent_tests} + write_rntuple.cpp + read_rntuple.cpp + ) +endif() set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO) foreach( sourcefile ${root_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${root_libs}") @@ -27,6 +34,9 @@ set_property(TEST read_frame_legacy_root PROPERTY DEPENDS write) set_property(TEST read_timed PROPERTY DEPENDS write_timed) set_property(TEST read_frame_root PROPERTY DEPENDS write_frame_root) set_property(TEST read_frame_root_multiple PROPERTY DEPENDS write_frame_root) +if(ENABLE_RNTUPLE) + set_property(TEST read_rntuple PROPERTY DEPENDS write_rntuple) +endif() add_test(NAME check_benchmark_outputs COMMAND check_benchmark_outputs write_benchmark_root.root read_benchmark_root.root) set_property(TEST check_benchmark_outputs PROPERTY DEPENDS read_timed write_timed) diff --git a/tests/root_io/read_rntuple.cpp b/tests/root_io/read_rntuple.cpp new file mode 100644 index 000000000..59688b2f2 --- /dev/null +++ b/tests/root_io/read_rntuple.cpp @@ -0,0 +1,6 @@ +#include "podio/ROOTNTupleReader.h" +#include "read_frame.h" + +int main() { + return read_frames("example_rntuple.root"); +} diff --git a/tests/root_io/write_rntuple.cpp b/tests/root_io/write_rntuple.cpp new file mode 100644 index 000000000..ce7810c53 --- /dev/null +++ b/tests/root_io/write_rntuple.cpp @@ -0,0 +1,6 @@ +#include "podio/ROOTNTupleWriter.h" +#include "write_frame.h" + +int main() { + write_frames("example_rntuple.root"); +} From 9ac7d32968d0b46da208982a478dd52e6de1b17a Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Tue, 11 Jul 2023 12:27:12 +0200 Subject: [PATCH 084/100] Allow the writers not to call finish() (#442) Co-authored-by: jmcarcell --- include/podio/ROOTFrameWriter.h | 4 +++- include/podio/SIOWriter.h | 4 +++- src/ROOTFrameWriter.cc | 8 ++++++++ src/SIOWriter.cc | 8 ++++++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/podio/ROOTFrameWriter.h b/include/podio/ROOTFrameWriter.h index 535b84025..addc22692 100644 --- a/include/podio/ROOTFrameWriter.h +++ b/include/podio/ROOTFrameWriter.h @@ -24,7 +24,7 @@ class GenericParameters; class ROOTFrameWriter { public: ROOTFrameWriter(const std::string& filename); - ~ROOTFrameWriter() = default; + ~ROOTFrameWriter(); ROOTFrameWriter(const ROOTFrameWriter&) = delete; ROOTFrameWriter& operator=(const ROOTFrameWriter&) = delete; @@ -83,6 +83,8 @@ class ROOTFrameWriter { std::unordered_map m_categories{}; ///< All categories DatamodelDefinitionCollector m_datamodelCollector{}; + + bool m_finished{false}; ///< Whether writing has been actually done }; } // namespace podio diff --git a/include/podio/SIOWriter.h b/include/podio/SIOWriter.h index 29ce0bc7d..7594ae74a 100644 --- a/include/podio/SIOWriter.h +++ b/include/podio/SIOWriter.h @@ -20,7 +20,7 @@ class DEPR_EVTSTORE SIOWriter { public: SIOWriter(const std::string& filename, EventStore* store); - ~SIOWriter() = default; + ~SIOWriter(); // non-copyable SIOWriter(const SIOWriter&) = delete; @@ -46,6 +46,8 @@ class DEPR_EVTSTORE SIOWriter { std::shared_ptr m_collectionMetaData; SIOFileTOCRecord m_tocRecord{}; std::vector m_collectionsToWrite{}; + + bool m_finished{false}; }; } // namespace podio diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index e24523da7..af7126596 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -15,6 +15,12 @@ ROOTFrameWriter::ROOTFrameWriter(const std::string& filename) { m_file = std::make_unique(filename.c_str(), "recreate"); } +ROOTFrameWriter::~ROOTFrameWriter() { + if (!m_finished) { + finish(); + } +} + void ROOTFrameWriter::writeFrame(const podio::Frame& frame, const std::string& category) { writeFrame(frame, category, frame.getAvailableCollections()); } @@ -143,6 +149,8 @@ void ROOTFrameWriter::finish() { m_file->Write(); m_file->Close(); + + m_finished = true; } } // namespace podio diff --git a/src/SIOWriter.cc b/src/SIOWriter.cc index 7cff1fa98..1c1221c13 100644 --- a/src/SIOWriter.cc +++ b/src/SIOWriter.cc @@ -34,6 +34,12 @@ SIOWriter::SIOWriter(const std::string& filename, EventStore* store) : auto& libLoader [[maybe_unused]] = SIOBlockLibraryLoader::instance(); } +SIOWriter::~SIOWriter() { + if (!m_finished) { + finish(); + } +} + void SIOWriter::writeEvent() { if (m_firstEvent) { // Write the collectionIDs as a separate record at the beginning of the @@ -86,6 +92,8 @@ void SIOWriter::finish() { m_stream.write(reinterpret_cast(&finalWords), sizeof(finalWords)); m_stream.close(); + + m_finished = true; } void SIOWriter::registerForWrite(const std::string& name) { From 6409d5835b26bc81904b84449ce5d02f5a75d7d3 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 13 Jul 2023 09:36:03 +0200 Subject: [PATCH 085/100] Make the SIOFrameWriter call to finish non-mandatory (#446) --- include/podio/SIOFrameWriter.h | 3 ++- src/SIOFrameWriter.cc | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/podio/SIOFrameWriter.h b/include/podio/SIOFrameWriter.h index a8a7d084f..c111261ca 100644 --- a/include/podio/SIOFrameWriter.h +++ b/include/podio/SIOFrameWriter.h @@ -17,7 +17,7 @@ class Frame; class SIOFrameWriter { public: SIOFrameWriter(const std::string& filename); - ~SIOFrameWriter() = default; + ~SIOFrameWriter(); SIOFrameWriter(const SIOFrameWriter&) = delete; SIOFrameWriter& operator=(const SIOFrameWriter&) = delete; @@ -37,6 +37,7 @@ class SIOFrameWriter { sio::ofstream m_stream{}; ///< The output file stream SIOFileTOCRecord m_tocRecord{}; ///< The "table of contents" of the written file DatamodelDefinitionCollector m_datamodelCollector{}; + bool m_finished{false}; ///< Has finish been called already? }; } // namespace podio diff --git a/src/SIOFrameWriter.cc b/src/SIOFrameWriter.cc index 360c948d2..85f497afe 100644 --- a/src/SIOFrameWriter.cc +++ b/src/SIOFrameWriter.cc @@ -26,6 +26,12 @@ SIOFrameWriter::SIOFrameWriter(const std::string& filename) { sio_utils::writeRecord(blocks, "podio_header_info", m_stream, sizeof(podio::version::Version), false); } +SIOFrameWriter::~SIOFrameWriter() { + if (!m_finished) { + finish(); + } +} + void SIOFrameWriter::writeFrame(const podio::Frame& frame, const std::string& category) { writeFrame(frame, category, frame.getAvailableCollections()); } From 654ad98b9997aa6c5a7598ea5e661c19f58a0461 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 13 Jul 2023 09:52:34 +0200 Subject: [PATCH 086/100] Make sure to only run finish once --- src/SIOFrameWriter.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/SIOFrameWriter.cc b/src/SIOFrameWriter.cc index 85f497afe..dcc1e4c22 100644 --- a/src/SIOFrameWriter.cc +++ b/src/SIOFrameWriter.cc @@ -76,6 +76,8 @@ void SIOFrameWriter::finish() { m_stream.write(reinterpret_cast(&finalWords), sizeof(finalWords)); m_stream.close(); + + m_finished = true; } } // namespace podio From c13ada895df12044ec6a07fcc8410142b2b42928 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 13 Jul 2023 13:06:23 +0200 Subject: [PATCH 087/100] Remove the lcio datalayout (#448) --- lcio/datalayout.yaml | 224 ------------------------------------------- 1 file changed, 224 deletions(-) delete mode 100644 lcio/datalayout.yaml diff --git a/lcio/datalayout.yaml b/lcio/datalayout.yaml deleted file mode 100644 index a031e3c11..000000000 --- a/lcio/datalayout.yaml +++ /dev/null @@ -1,224 +0,0 @@ ---- - -# LCIO test description -# Changes w.r.t. to original: -# o no collection specific information implemented for now -# o no string member in Vertex - -datatypes : - - # LCIO RawCalorimeterHit - RawCalorimeterHit: - description: "LCIO raw calorimeter hit" - author : "F.Gaede, B. Hegner" - members: - - int cellID0 // The detector specific (geometrical) cell id. - - int cellID1 // The second detector specific (geometrical) cell id. - - int amplitude // The amplitude of the hit in ADC counts. - - int timeStamp // The time stamp for the hit. - - # LCIO CalorimeterHit - CalorimeterHit: - description: "LCIO calorimeter hit" - author : "F.Gaede, B. Hegner" - members: - - int cellID0 // The detector specific (geometrical) cell id. - - int cellID1 // The second detector specific (geometrical) cell id. - - float energy // The energy of the hit in [GeV]. - - float time // The time of the hit in [ns]. - - float[3] position // The position of the hit in world coordinates. - - int type // The type of the hit - OneToOneRelations: - - RawCalorimeterHit rawHit // The RawCalorimeterHit - - # LCIO cluster - # Changes w.r.t. to original: - # o std::bitset< 32 > _type not yet mapped - # o ParticleIDs are not implemented - # o cluster weight not properly implemented - Cluster: - description: "LCIO cluster" - author : "F.Gaede, B. Hegner" - members: - - float energy // Energy of the cluster - - float[3] position // Position of the cluster. - - float[6] positionError // Covariance matrix of the position (6 Parameters) - - float theta // Intrinsic direction of cluster at position - Theta. - - float phi // Intrinsic direction of cluster at position - Phi. - - float[3] directionError // Covariance matrix of the direction (3 Parameters) - - std::vector shape // Shape parameters - - std::vector weight // weight of a particular cluster - - std::vector subdetectorEnergies // A vector that holds the energy observed in a particular subdetector. - OneToManyRelations: - - Cluster clusters // The clusters that have been combined to this cluster. - - CalorimeterHit hits // The hits that have been combined to this cluster. - - # LCIO MCParticle - # Changes w.r.t. to original: - # o std::bitset< 32 > _simstatus not yet implemented - MCParticle: - description: "LCIO MC Particle" - author : "F.Gaede, B. Hegner" - members: - - int pdg // The PDG code of the particle. - - int genstatus // The status for particles as defined by the generator. - - float[3] vertex // The production vertex of the particle in [mm]. - - float charge // The particle's charge. - - float mass // The mass of the particle in [GeV] - - float time // The creation time of the particle in [ns] wrt. the event, e.g. for preassigned decays or decays in flight from the simulator. - - float[3] endpoint // The endpoint of the particle in [mm] - - bool endpointSet // Whether the endpoint has been set - - float[3] momentum // The particle's 3-momentum at the production vertex in [GeV] - OneToManyRelations: - - MCParticle parents // The parents of this particle. - - MCParticle daughters // The daughters this particle. - - # LCIO ReconstructedParticle - ReconstructedParticle: - description: "LCIO Reconstructed Particle" - author : "F.Gaede, B. Hegner" - members: - - int type // Type of reconstructed particle. - - float energy // Energy of the reconstructed particle. - - float[3] momentum // The reconstructed particle's 3-momentum - - float charge // The particle's charge - - float mass // The mass of the particle in [GeV] - OneToOneRelations: - - Vertex vertex // The start vertex associated to this particle. - OneToManyRelations: - - Cluster clusters // The clusters combined to this particle. - - Track tracks // The tracks combined to this particle" - - ReconstructedParticle particles // The particles combined to this particle - -#EVENT::FloatVec _cov -#float _reference [3] -#EVENT::ParticleID * _pidUsed -#float _goodnessOfPID -#EVENT::ParticleIDVec _pid - - # LCIO SimCalorimeterHit - # Changes w.r.t. to original: - # o MCParticleContribution has to become its own collection - SimCalorimeterHit: - description: "LCIO simulated calorimeter hit" - author : "F.Gaede, B. Hegner" - members: - - int cellID0 // The detector specific (geometrical) cell id. - - int cellID1 // The second detector specific (geometrical) cell id. - - float energy // The energy of the hit in [GeV]. - - float time // The time of the hit in [ns]. - - float[3] position // The position of the hit in world coordinates. - - # LCIO SimTrackerHit - SimTrackerHit: - description: "LCIO simulated tracker hit" - author : "F.Gaede, B. Hegner" - members: - - int cellID0 // The detector specific (geometrical) cell id. - - int cellID1 // The second detector specific (geometrical) cell id. - - float[3] position // The position of the hit in world coordinates. - - float time // The time of the hit in [ns]. - - float pathLength // path length - - float EDep // EDep - - float _p [3] // position? - OneToOneRelations: - - MCParticle particle // The MCParticle that caused the hit. - - - # LCIO Track - # o not yet implemented std::bitset< 32 > _type - Track: - description: "LCIO reconstructed track" - author : "F.Gaede, B. Hegner" - members: - - float chi2 // Chi2 - - int ndf // Number of degrees of freedom of the track fit. - - float dEdx // dEdx of the track. - - float dEdxError // Error of dEdx. - - float radiusOfInnermostHit // The radius of the innermost hit that has been used in the track fit. - - std::vector subdetectorHitNumbers // The number of hits in particular subdetectors - OnoToManyRelations: - - Track tracks // The tracks that have been combined to this track. - - TrackerHit hits // The hits that have been combined to this track. - - TrackState trackStates // Track states associated to this track. - - # LCIO TrackerData - TrackerData: - description: "LCIO tracker data" - author : "F.Gaede, B. Hegner" - members: - - int cellID0 // The detector specific (geometrical) cell id. - - int cellID1 // The second detector specific (geometrical) cell id. - - int time // The time of the hit. - - std::vector charge // The corrected (calibrated) FADC spectrum. - - # LCIO TrackerHit - # o no specialisation for the different kind of geometries - TrackerHit: - description : "LCIO tracker hit" - author : "F.Gaede, B. Hegner" - members : - - int cellID0 // The detector specific (geometrical) cell id. - - int cellID1 // The second detector specific (geometrical) cell id. - - int time // The time of the hit. - - float EDep // EDep - - float EDepError // error on EDep - - float[3] position // ... - - std::vector // The calibrated ADC values - -#int _type -#EVENT::FloatVec _cov -#int _quality -#EVENT::LCObjectVec _rawHits - - # LCIO TrackerPulse - TrackerPulse: - description : "LCIO tracker pulse" - author : "F. Gaede, B. Hegner" - members: - - int cellID0 // The detector specific (geometrical) cell id. - - int cellID1 // The second detector specific (geometrical) cell id. - - int quality // ... - - float time // The time of the pulse. - - float charge // The integrated charge of the pulse - - std::vector cov // ... - OneToOneRelations: - - TrackerData corrData // ... - - # LCIO TrackerRawData - TrackerData: - description: "LCIO tracker raw data" - author : "F.Gaede, B. Hegner" - members: - - int cellID0 // The detector specific (geometrical) cell id. - - int cellID1 // The second detector specific (geometrical) cell id. - - int time // time measurement associated with the adc values. - - std::vector charge // The actual FADC spectrum. - - # LCIO TrackState - TrackState: - description: "LCIO track state" - author : "F.Gaede, B. Hegner" - members: - - int location // The location of the track state. - - float d0 // Impact parameter of the track in (r-phi). - - float phi // Phi of the track at the reference point. - - float omega // Omega is the signed curvature of the track in [1/mm]. - - float z0 // Impact parameter of the track in (r-z). - - float tanLambda // Lambda is the dip angle of the track in r-z at the reference point. - - float[3] referencePoint // Reference point of the track parameters - - std::vector covMatrix // Covariance matrix of the track parameters. - - # LCIO Vertex - Vertex: - description: "LCIO vertex" - author : "F.Gaede, B. Hegner" - members: - - int primary // Whether it is the primary vertex of the event - - float chi2 // Chi squared of the vertex fit. - - float probability // Probability of the vertex fit - - float[3] position // Position of the vertex. - - std::vector cov // - - std::vector par // - OneToOneRelations: - - ReconstructedParticle particle // Reconstructed Particle associated to the Vertex. From eb9276bcb5d3a315ffab4d5296b499a8bc16743e Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 13 Jul 2023 15:47:28 +0200 Subject: [PATCH 088/100] Fix the pre-commit workflow (#449) * Switch to a newer LCG stack with root 6.28/00 * Try with key4hep nightlies * Allow git to run necessary commands for pre-commit * Fix clang-format complaints * Fix clang-tidy complaints * Reorder some imports to make pylint happy again * Enbale building RNTuple support in more workflows --- .github/workflows/pre-commit.yml | 13 +++++++++---- .github/workflows/test.yml | 14 +++++++++----- python/podio/root_io.py | 4 ++-- python/podio/sio_io.py | 4 ++-- python/podio_class_generator.py | 2 +- src/EventStore.cc | 2 +- src/ROOTReader.cc | 2 +- src/SIOBlockUserData.cc | 21 --------------------- src/SchemaEvolution.cc | 2 +- 9 files changed, 26 insertions(+), 38 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index cc4eeba90..70d60a73b 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -15,19 +15,24 @@ jobs: - uses: cvmfs-contrib/github-action-cvmfs@v3 - uses: aidasoft/run-lcg-view@v4 with: - release-platform: LCG_102/x86_64-centos7-clang12-opt + container: centos7 + view-path: /cvmfs/sw-nightlies.hsf.org/key4hep run: | echo "::group::Setup pre-commit" - export PYTHONPATH=$(python -m site --user-site):$PYTHONPATH export PATH=/root/.local/bin:$PATH + # Newer versions of git are more cautious around the github runner + # environment and without this git rev-parse --show-cdup in pre-commit + # fails + git config --global --add safe.directory $(pwd) pip install pre-commit - # Use virtualenv from the LCG release - pip uninstall --yes virtualenv + pip install pylint==2.12.2 + pip install flake8 echo "::endgroup::" echo "::group::Run CMake" mkdir build cd build cmake .. -DENABLE_SIO=ON \ + -DENABLE_RNTUPLE=ON \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror "\ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a57fa321a..858d20849 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,12 +13,15 @@ jobs: strategy: fail-fast: false matrix: - sio: [ON] - LCG: ["LCG_102/x86_64-centos7-clang12-opt", - "LCG_102/x86_64-centos8-gcc11-opt", - "dev3/x86_64-centos7-clang12-opt", + RNTUPLE: [ON] + LCG: ["dev3/x86_64-centos7-clang12-opt", "dev4/x86_64-centos7-gcc11-opt", "dev4/x86_64-centos7-clang12-opt"] + include: + - LCG: "LCG_102/x86_64-centos7-clang12-opt" + RNTUPLE: OFF + - LCG: "LCG_102/x86_64-centos8-gcc11-opt" + RNTUPLE: OFF steps: - uses: actions/checkout@v3 - uses: cvmfs-contrib/github-action-cvmfs@v3 @@ -29,7 +32,8 @@ jobs: echo "::group::Run CMake" mkdir build install cd build - cmake -DENABLE_SIO=${{ matrix.sio }} \ + cmake -DENABLE_SIO=ON \ + -DENABLE_RNTUPLE=${{ matrix.RNTUPLE }} \ -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ diff --git a/python/podio/root_io.py b/python/podio/root_io.py index 4dc6f16a7..a5f25950e 100644 --- a/python/podio/root_io.py +++ b/python/podio/root_io.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 """Python module for reading root files containing podio Frames""" -from podio.base_reader import BaseReaderMixin - from ROOT import gSystem gSystem.Load('libpodioRootIO') # noqa: E402 from ROOT import podio # noqa: E402 # pylint: disable=wrong-import-position +from podio.base_reader import BaseReaderMixin # pylint: disable=wrong-import-position + Writer = podio.ROOTFrameWriter diff --git a/python/podio/sio_io.py b/python/podio/sio_io.py index ae695fbb7..01f9d577f 100644 --- a/python/podio/sio_io.py +++ b/python/podio/sio_io.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 """Python module for reading sio files containing podio Frames""" -from podio.base_reader import BaseReaderMixin # pylint: disable=wrong-import-position - from ROOT import gSystem ret = gSystem.Load('libpodioSioIO') # noqa: 402 # Return values: -1 when it doesn't exist and -2 when there is a version mismatch @@ -10,6 +8,8 @@ raise ImportError('Error when importing libpodioSioIO') from ROOT import podio # noqa: 402 # pylint: disable=wrong-import-position +from podio.base_reader import BaseReaderMixin # pylint: disable=wrong-import-position + Writer = podio.SIOFrameWriter diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 4f86f2a45..5f74a5ad6 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -13,9 +13,9 @@ import jinja2 +from podio_schema_evolution import DataModelComparator # dealing with cyclic imports from podio.podio_config_reader import PodioConfigReader from podio.generator_utils import DataType, DefinitionError, DataModelJSONEncoder -from podio_schema_evolution import DataModelComparator # dealing with cyclic imports THIS_DIR = os.path.dirname(os.path.abspath(__file__)) TEMPLATE_DIR = os.path.join(THIS_DIR, 'templates') diff --git a/src/EventStore.cc b/src/EventStore.cc index 947d83fff..831cc7030 100644 --- a/src/EventStore.cc +++ b/src/EventStore.cc @@ -34,7 +34,7 @@ bool EventStore::get(uint32_t id, CollectionBase*& collection) const { } void EventStore::registerCollection(const std::string& name, podio::CollectionBase* coll) { - m_collections.push_back({name, coll}); + m_collections.emplace_back(name, coll); auto id = m_table->add(name); coll->setID(id); } diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 768e099f3..a366962ac 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -133,7 +133,7 @@ CollectionBase* ROOTReader::readCollectionData(const root_utils::CollectionBranc collection->setID(id); collection->prepareAfterRead(); - m_inputs.emplace_back(std::make_pair(collection, name)); + m_inputs.emplace_back(collection, name); return collection; } diff --git a/src/SIOBlockUserData.cc b/src/SIOBlockUserData.cc index 0c6a52381..e1318add2 100644 --- a/src/SIOBlockUserData.cc +++ b/src/SIOBlockUserData.cc @@ -1,7 +1,5 @@ #include "podio/SIOBlockUserData.h" -//#define PODIO_ADD_USER_TYPE_SIO(type) static UserDataSIOBlock _default##type##CollcetionSIOBlock ; - namespace podio { static SIOBlockUserData _defaultfloatCollcetionSIOBlock; @@ -18,22 +16,3 @@ static SIOBlockUserData _defaultuint32_tCollcetionSIOBlock; static SIOBlockUserData _defaultuint64_tCollcetionSIOBlock; } // namespace podio - -// g++ -E ../src/SIOBlockUserData.cc -// PODIO_ADD_USER_TYPE_SIO(int) -// PODIO_ADD_USER_TYPE_SIO(long) -// PODIO_ADD_USER_TYPE_SIO(float) -// PODIO_ADD_USER_TYPE_SIO(double) -// PODIO_ADD_USER_TYPE_SIO(unsigned) -// PODIO_ADD_USER_TYPE_SIO(unsigned int) -// PODIO_ADD_USER_TYPE_SIO(unsigned long) -// PODIO_ADD_USER_TYPE_SIO(char) -// PODIO_ADD_USER_TYPE_SIO(short) -// PODIO_ADD_USER_TYPE_SIO(long long) -// PODIO_ADD_USER_TYPE_SIO(unsigned long long) -// PODIO_ADD_USER_TYPE_SIO(int16_t) -// PODIO_ADD_USER_TYPE_SIO(int32_t) -// PODIO_ADD_USER_TYPE_SIO(int64_t) -// PODIO_ADD_USER_TYPE_SIO(uint16_t) -// PODIO_ADD_USER_TYPE_SIO(uint32_t) -// PODIO_ADD_USER_TYPE_SIO(uint64_t) diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index 2377e5833..60b53264e 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -50,7 +50,7 @@ void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaV // structure and update the index if (typeIt->second.index == MapIndex::NoEvolutionAvailable) { typeIt->second.index = m_evolutionFuncs.size(); - m_evolutionFuncs.emplace_back(EvolFuncVersionMapT{}); + m_evolutionFuncs.emplace_back(); } // From here on out we don't need the mutabale any longer From d2a895a1ae178d1ac270a2f66e355d95e0c83669 Mon Sep 17 00:00:00 2001 From: hegner Date: Thu, 13 Jul 2023 19:59:50 +0200 Subject: [PATCH 089/100] add description and author to components (#450) --- python/podio/podio_config_reader.py | 2 +- python/podio/test_ClassDefinitionValidator.py | 5 ----- python/templates/Component.h.jinja2 | 3 ++- tests/datalayout.yaml | 2 ++ 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/python/podio/podio_config_reader.py b/python/podio/podio_config_reader.py index c3d9b4106..4e99e2423 100644 --- a/python/podio/podio_config_reader.py +++ b/python/podio/podio_config_reader.py @@ -162,7 +162,7 @@ def _check_components(cls, datamodel, upstream_edm): """Check the components.""" for name, component in datamodel.components.items(): for field in component: - if field not in ['Members', 'ExtraCode']: + if field not in ['Members', 'ExtraCode', 'Description', 'Author']: raise DefinitionError(f"{name} defines a '{field}' field which is not allowed for a component") if 'ExtraCode' in component: diff --git a/python/podio/test_ClassDefinitionValidator.py b/python/podio/test_ClassDefinitionValidator.py index 5db93c195..c1b9d4cd1 100644 --- a/python/podio/test_ClassDefinitionValidator.py +++ b/python/podio/test_ClassDefinitionValidator.py @@ -109,11 +109,6 @@ def test_component_valid_members(self): self._assert_no_exception(DefinitionError, '{} should allow for component members in components', self.validate, make_dm(components, {}), False) - def test_component_invalid_field(self): - self.valid_component['Component']['Author'] = 'An invalid field for a component' - with self.assertRaises(DefinitionError): - self.validate(make_dm(self.valid_component, {}), False) - def test_datatype_valid_members(self): self._assert_no_exception(DefinitionError, '{} should not raise for a valid datatype', self.validate, make_dm({}, self.valid_datatype, self.def_opts)) diff --git a/python/templates/Component.h.jinja2 b/python/templates/Component.h.jinja2 index 2a9dad66a..bc82c3c92 100644 --- a/python/templates/Component.h.jinja2 +++ b/python/templates/Component.h.jinja2 @@ -1,3 +1,4 @@ +{% import "macros/declarations.jinja2" as macros %} {% import "macros/utils.jinja2" as utils %} // AUTOMATICALLY GENERATED FILE - DO NOT EDIT @@ -14,7 +15,7 @@ #endif {{ utils.namespace_open(class.namespace) }} - +{{ macros.class_description(class.bare_type, Description, Author) }} class {{ class.bare_type }} { public: {% for member in Members %} diff --git a/tests/datalayout.yaml b/tests/datalayout.yaml index 369d39b58..cfca90388 100755 --- a/tests/datalayout.yaml +++ b/tests/datalayout.yaml @@ -23,6 +23,8 @@ components : " NotSoSimpleStruct: + Description : "A not so simple struct" + Author : "Someone" Members: - SimpleStruct data // component members can have descriptions From e38f8386ed1fdb0e0fc081a975ba24bb7e7633d2 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 14 Jul 2023 08:14:46 +0200 Subject: [PATCH 090/100] Hide the JSON output functionality from CLING (#452) --- python/templates/Collection.cc.jinja2 | 2 +- python/templates/Collection.h.jinja2 | 4 ++-- python/templates/Component.h.jinja2 | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 88a18b305..99cbd01af 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -227,7 +227,7 @@ const auto registeredCollection = registerCollection(); } // namespace -#ifdef PODIO_JSON_OUTPUT +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) void to_json(nlohmann::json& j, const {{ collection_type }}& collection) { j = nlohmann::json::array(); for (auto&& elem : collection) { diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index 4ef0df066..24265eef1 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -18,7 +18,7 @@ #include "podio/CollectionBase.h" #include "podio/CollectionIDTable.h" -#ifdef PODIO_JSON_OUTPUT +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) #include "nlohmann/json.hpp" #endif @@ -193,7 +193,7 @@ Mutable{{ class.bare_type }} {{ class.bare_type }}Collection::create(Args&&... a return Mutable{{ class.bare_type }}(obj); } -#ifdef PODIO_JSON_OUTPUT +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) void to_json(nlohmann::json& j, const {{ class.bare_type }}Collection& collection); #endif diff --git a/python/templates/Component.h.jinja2 b/python/templates/Component.h.jinja2 index bc82c3c92..bc775b362 100644 --- a/python/templates/Component.h.jinja2 +++ b/python/templates/Component.h.jinja2 @@ -10,7 +10,7 @@ {% endfor %} #include -#ifdef PODIO_JSON_OUTPUT +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) #include "nlohmann/json.hpp" #endif @@ -40,7 +40,7 @@ inline std::ostream& operator<<(std::ostream& o, const {{class.full_type}}& valu return o; } -#ifdef PODIO_JSON_OUTPUT +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) inline void to_json(nlohmann::json& j, const {{ class.bare_type }}& value) { j = nlohmann::json{ {% set comma = joiner(",") %} From ac26a122b0446fee2dcd450a6d11625bf7ee9793 Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Fri, 14 Jul 2023 09:55:27 +0200 Subject: [PATCH 091/100] Rename CMAKE_{SOURCE,BIN}_DIR PROJECT_{SOURCE,BIN}_DIR (#454) --- cmake/podioCPack.cmake | 6 +++--- cmake/podioCreateConfig.cmake | 2 +- cmake/podioTest.cmake | 8 +++---- doc/Doxyfile.in | 6 +++--- python/CMakeLists.txt | 2 +- src/CMakeLists.txt | 38 +++++++++++++++++----------------- tests/CMakeLists.txt | 2 +- tests/dumpmodel/CMakeLists.txt | 20 +++++++++--------- 8 files changed, 42 insertions(+), 42 deletions(-) diff --git a/cmake/podioCPack.cmake b/cmake/podioCPack.cmake index 919acac7a..e20828ce4 100644 --- a/cmake/podioCPack.cmake +++ b/cmake/podioCPack.cmake @@ -15,9 +15,9 @@ set(CPACK_DEBIAN_PACKAGE_MAINTAINER "valentin.volkl@cern.ch") set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${CPACK_PACKAGE_HOMEPAGE_URL}) set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "hep-root, python-yaml") -set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_SOURCE_DIR}/README.md") -set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE") -set(CPACK_RESOURCE_FILE_README "${CMAKE_SOURCE_DIR}/README.md") +set(CPACK_PACKAGE_DESCRIPTION_FILE "${PROJECT_SOURCE_DIR}/README.md") +set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE") +set(CPACK_RESOURCE_FILE_README "${PROJECT_SOURCE_DIR}/README.md") #--- source package settings --------------------------------------------------- set(CPACK_SOURCE_IGNORE_FILES diff --git a/cmake/podioCreateConfig.cmake b/cmake/podioCreateConfig.cmake index 74d65e9a0..0d73cd441 100644 --- a/cmake/podioCreateConfig.cmake +++ b/cmake/podioCreateConfig.cmake @@ -19,7 +19,7 @@ configure_package_config_file(${PROJECT_SOURCE_DIR}/cmake/podioConfig.cmake.in install(FILES ${CMAKE_CURRENT_BINARY_DIR}/podioConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/podioConfigVersion.cmake - ${CMAKE_SOURCE_DIR}/cmake/podioMacros.cmake + ${PROJECT_SOURCE_DIR}/cmake/podioMacros.cmake DESTINATION ${CMAKE_INSTALL_CMAKEDIR}/${PROJECT_NAME} ) install(EXPORT podioTargets DESTINATION ${CMAKE_INSTALL_CMAKEDIR}/${PROJECT_NAME} diff --git a/cmake/podioTest.cmake b/cmake/podioTest.cmake index f668a8d7b..0515a45fa 100644 --- a/cmake/podioTest.cmake +++ b/cmake/podioTest.cmake @@ -6,13 +6,13 @@ function(PODIO_SET_TEST_ENV test) set_property(TEST ${test} PROPERTY ENVIRONMENT LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/tests:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} - PYTHONPATH=${CMAKE_SOURCE_DIR}/python:$ENV{PYTHONPATH} + PYTHONPATH=${PROJECT_SOURCE_DIR}/python:$ENV{PYTHONPATH} PODIO_SIOBLOCK_PATH=${CMAKE_BINARY_DIR}/tests - ROOT_INCLUDE_PATH=${CMAKE_BINARY_DIR}/tests/datamodel:${CMAKE_SOURCE_DIR}/include + ROOT_INCLUDE_PATH=${CMAKE_BINARY_DIR}/tests/datamodel:${PROJECT_SOURCE_DIR}/include SKIP_SIO_TESTS=$> IO_HANDLERS=${IO_HANDLERS} PODIO_USE_CLANG_FORMAT=${PODIO_USE_CLANG_FORMAT} - PODIO_BASE=${CMAKE_SOURCE_DIR} + PODIO_BASE=${PROJECT_SOURCE_DIR} ENABLE_SIO=${ENABLE_SIO} ) endfunction() @@ -35,7 +35,7 @@ macro(PODIO_DOWNLOAD_LEGACY_INPUTS legacy_versions) if (NOT DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR} OR NOT EXISTS ${PODIO_TEST_INPUT_DATA_DIR}/v00-16-05/example_frame.root) message(STATUS "Getting test input files") execute_process( - COMMAND bash ${CMAKE_SOURCE_DIR}/tests/scripts/get_test_inputs.sh ${legacy_versions} + COMMAND bash ${PROJECT_SOURCE_DIR}/tests/scripts/get_test_inputs.sh ${legacy_versions} OUTPUT_VARIABLE podio_test_input_data_dir RESULT_VARIABLE test_inputs_available ) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 2de868d7e..b0f601f8f 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -144,7 +144,7 @@ FULL_PATH_NAMES = YES # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. -STRIP_FROM_PATH = @CMAKE_SOURCE_DIR@ @CMAKE_BINARY_DIR@ +STRIP_FROM_PATH = @PROJECT_SOURCE_DIR@ @CMAKE_BINARY_DIR@ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which @@ -743,7 +743,7 @@ WARN_LOGFILE = @CMAKE_BINARY_DIR@/doxygen-warnings.log # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = @CMAKE_SOURCE_DIR@ +INPUT = @PROJECT_SOURCE_DIR@ INPUT += @CMAKE_BINARY_DIR@/include INPUT += @CMAKE_CURRENT_BINARY_DIR@ @@ -818,7 +818,7 @@ EXCLUDE_SYMBOLS = # that contain example code fragments that are included (see the \include # command). -EXAMPLE_PATH = @CMAKE_SOURCE_DIR@ +EXAMPLE_PATH = @PROJECT_SOURCE_DIR@ # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 9a7e39bd7..eea3a105a 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -28,7 +28,7 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/templates DESTINATION ${podio_PYTHON_INSTALLDIR}) IF (BUILD_TESTING) - add_test( NAME pyunittest COMMAND python3 -m unittest discover -s ${CMAKE_SOURCE_DIR}/python/podio) + add_test( NAME pyunittest COMMAND python3 -m unittest discover -s ${PROJECT_SOURCE_DIR}/python/podio) PODIO_SET_TEST_ENV(pyunittest) set_property(TEST pyunittest PROPERTY DEPENDS write write_frame_root) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index daea12e5f..8cdd0d813 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -61,16 +61,16 @@ SET(core_sources ) SET(core_headers - ${CMAKE_SOURCE_DIR}/include/podio/CollectionBase.h - ${CMAKE_SOURCE_DIR}/include/podio/CollectionIDTable.h - ${CMAKE_SOURCE_DIR}/include/podio/EventStore.h - ${CMAKE_SOURCE_DIR}/include/podio/ICollectionProvider.h - ${CMAKE_SOURCE_DIR}/include/podio/IReader.h - ${CMAKE_SOURCE_DIR}/include/podio/ObjectID.h - ${CMAKE_SOURCE_DIR}/include/podio/UserDataCollection.h - ${CMAKE_SOURCE_DIR}/include/podio/podioVersion.h - ${CMAKE_SOURCE_DIR}/include/podio/DatamodelRegistry.h - ${CMAKE_SOURCE_DIR}/include/podio/utilities/DatamodelRegistryIOHelpers.h + ${PROJECT_SOURCE_DIR}/include/podio/CollectionBase.h + ${PROJECT_SOURCE_DIR}/include/podio/CollectionIDTable.h + ${PROJECT_SOURCE_DIR}/include/podio/EventStore.h + ${PROJECT_SOURCE_DIR}/include/podio/ICollectionProvider.h + ${PROJECT_SOURCE_DIR}/include/podio/IReader.h + ${PROJECT_SOURCE_DIR}/include/podio/ObjectID.h + ${PROJECT_SOURCE_DIR}/include/podio/UserDataCollection.h + ${PROJECT_SOURCE_DIR}/include/podio/podioVersion.h + ${PROJECT_SOURCE_DIR}/include/podio/DatamodelRegistry.h + ${PROJECT_SOURCE_DIR}/include/podio/utilities/DatamodelRegistryIOHelpers.h ) PODIO_ADD_LIB_AND_DICT(podio "${core_headers}" "${core_sources}" selection.xml) @@ -94,14 +94,14 @@ if(ENABLE_RNTUPLE) endif() SET(root_headers - ${CMAKE_SOURCE_DIR}/include/podio/ROOTFrameReader.h - ${CMAKE_SOURCE_DIR}/include/podio/ROOTLegacyReader.h - ${CMAKE_SOURCE_DIR}/include/podio/ROOTFrameWriter.h + ${PROJECT_SOURCE_DIR}/include/podio/ROOTFrameReader.h + ${PROJECT_SOURCE_DIR}/include/podio/ROOTLegacyReader.h + ${PROJECT_SOURCE_DIR}/include/podio/ROOTFrameWriter.h ) if(ENABLE_RNTUPLE) list(APPEND root_headers - ${CMAKE_SOURCE_DIR}/include/podio/ROOTNTupleReader.h - ${CMAKE_SOURCE_DIR}/include/podio/ROOTNTupleWriter.h + ${PROJECT_SOURCE_DIR}/include/podio/ROOTNTupleReader.h + ${PROJECT_SOURCE_DIR}/include/podio/ROOTNTupleWriter.h ) endif() @@ -119,7 +119,7 @@ SET(python_sources ) SET(python_headers - ${CMAKE_SOURCE_DIR}/include/podio/PythonEventStore.h + ${PROJECT_SOURCE_DIR}/include/podio/PythonEventStore.h ) PODIO_ADD_LIB_AND_DICT(podioPythonStore "${python_headers}" "${python_sources}" python_selection.xml) target_link_libraries(podioPythonStore PUBLIC podio::podio) @@ -140,9 +140,9 @@ if(ENABLE_SIO) ) SET(sio_headers - ${CMAKE_SOURCE_DIR}/include/podio/SIOFrameReader.h - ${CMAKE_SOURCE_DIR}/include/podio/SIOLegacyReader.h - ${CMAKE_SOURCE_DIR}/include/podio/SIOFrameWriter.h + ${PROJECT_SOURCE_DIR}/include/podio/SIOFrameReader.h + ${PROJECT_SOURCE_DIR}/include/podio/SIOLegacyReader.h + ${PROJECT_SOURCE_DIR}/include/podio/SIOFrameWriter.h ) PODIO_ADD_LIB_AND_DICT(podioSioIO "${sio_headers}" "${sio_sources}" sio_selection.xml) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6591196c8..60b2bdd17 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,7 +6,7 @@ foreach( _conf ${CMAKE_CONFIGURATION_TYPES} ) endforeach() # Set the podio_PYTHON_DIR manually here because the macros below expect it -SET(podio_PYTHON_DIR ${CMAKE_SOURCE_DIR}/python) +SET(podio_PYTHON_DIR ${PROJECT_SOURCE_DIR}/python) PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} diff --git a/tests/dumpmodel/CMakeLists.txt b/tests/dumpmodel/CMakeLists.txt index ccab43bc6..d796325cb 100644 --- a/tests/dumpmodel/CMakeLists.txt +++ b/tests/dumpmodel/CMakeLists.txt @@ -1,19 +1,19 @@ # Add tests for storing and retrieving the EDM definitions into the produced # files -add_test(NAME datamodel_def_store_roundtrip_root COMMAND ${CMAKE_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh +add_test(NAME datamodel_def_store_roundtrip_root COMMAND ${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root datamodel - ${CMAKE_SOURCE_DIR}/tests + ${PROJECT_SOURCE_DIR}/tests ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_root) # The extension model needs to know about the upstream model for generation add_test(NAME datamodel_def_store_roundtrip_root_extension COMMAND - ${CMAKE_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh + ${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root extension_model - ${CMAKE_SOURCE_DIR}/tests/extension_model - --upstream-edm=datamodel:${CMAKE_SOURCE_DIR}/tests/datalayout.yaml + ${PROJECT_SOURCE_DIR}/tests/extension_model + --upstream-edm=datamodel:${PROJECT_SOURCE_DIR}/tests/datalayout.yaml ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_root_extension) @@ -27,18 +27,18 @@ set_tests_properties( set(sio_roundtrip_tests "") if (ENABLE_SIO) - add_test(NAME datamodel_def_store_roundtrip_sio COMMAND ${CMAKE_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh + add_test(NAME datamodel_def_store_roundtrip_sio COMMAND ${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio datamodel - ${CMAKE_SOURCE_DIR}/tests + ${PROJECT_SOURCE_DIR}/tests ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_sio) # The extension model needs to know about the upstream model for generation - add_test(NAME datamodel_def_store_roundtrip_sio_extension COMMAND ${CMAKE_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh + add_test(NAME datamodel_def_store_roundtrip_sio_extension COMMAND ${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio extension_model - ${CMAKE_SOURCE_DIR}/tests/extension_model - --upstream-edm=datamodel:${CMAKE_SOURCE_DIR}/tests/datalayout.yaml + ${PROJECT_SOURCE_DIR}/tests/extension_model + --upstream-edm=datamodel:${PROJECT_SOURCE_DIR}/tests/datalayout.yaml ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_sio_extension) From ace49f71ff16a4475bc16a2b3edaf01e730dcb9d Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Tue, 18 Jul 2023 11:33:52 +0200 Subject: [PATCH 092/100] Rename CMAKE_BINARY_DIR to PROJECT_BINARY_DIR (#455) Co-authored-by: jmcarcell --- cmake/podioDoxygen.cmake | 2 +- cmake/podioMacros.cmake | 2 +- cmake/podioTest.cmake | 6 +++--- doc/Doxyfile.in | 12 ++++++------ python/CMakeLists.txt | 2 +- tests/CMakeLists.txt | 2 +- tests/dumpmodel/CMakeLists.txt | 8 ++++---- tests/root_io/CMakeLists.txt | 2 +- tests/unittests/CMakeLists.txt | 2 +- tools/CMakeLists.txt | 16 ++++++++-------- 10 files changed, 27 insertions(+), 27 deletions(-) diff --git a/cmake/podioDoxygen.cmake b/cmake/podioDoxygen.cmake index 4a472b154..c8e09b9e1 100644 --- a/cmake/podioDoxygen.cmake +++ b/cmake/podioDoxygen.cmake @@ -8,7 +8,7 @@ if(DOXYGEN_FOUND) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/doc/Doxyfile.in ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/doc/doxy-boot.js.in - ${CMAKE_BINARY_DIR}/doxygen/html/doxy-boot.js) + ${PROJECT_BINARY_DIR}/doxygen/html/doxy-boot.js) add_custom_target(doc ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} diff --git a/cmake/podioMacros.cmake b/cmake/podioMacros.cmake index bb217801c..cf8124d18 100644 --- a/cmake/podioMacros.cmake +++ b/cmake/podioMacros.cmake @@ -368,7 +368,7 @@ function(PODIO_CHECK_CPP_FS FS_LIBS) # After that it should be built-in FOREACH(FS_LIB_NAME "" stdc++fs c++fs) # MESSAGE(STATUS "Linking against ${FS_LIB_NAME}") - try_compile(have_filesystem ${CMAKE_BINARY_DIR}/try ${PROJECT_SOURCE_DIR}/cmake/try_filesystem.cpp + try_compile(have_filesystem ${PROJECT_BINARY_DIR}/try ${PROJECT_SOURCE_DIR}/cmake/try_filesystem.cpp CXX_STANDARD ${CMAKE_CXX_STANDARD} CXX_EXTENSIONS False OUTPUT_VARIABLE HAVE_FS_OUTPUT diff --git a/cmake/podioTest.cmake b/cmake/podioTest.cmake index 0515a45fa..5230af7a3 100644 --- a/cmake/podioTest.cmake +++ b/cmake/podioTest.cmake @@ -5,10 +5,10 @@ function(PODIO_SET_TEST_ENV test) list(JOIN PODIO_IO_HANDLERS " " IO_HANDLERS) set_property(TEST ${test} PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/tests:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + LD_LIBRARY_PATH=${PROJECT_BINARY_DIR}/tests:${PROJECT_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} PYTHONPATH=${PROJECT_SOURCE_DIR}/python:$ENV{PYTHONPATH} - PODIO_SIOBLOCK_PATH=${CMAKE_BINARY_DIR}/tests - ROOT_INCLUDE_PATH=${CMAKE_BINARY_DIR}/tests/datamodel:${PROJECT_SOURCE_DIR}/include + PODIO_SIOBLOCK_PATH=${PROJECT_BINARY_DIR}/tests + ROOT_INCLUDE_PATH=${PROJECT_BINARY_DIR}/tests/datamodel:${PROJECT_SOURCE_DIR}/include SKIP_SIO_TESTS=$> IO_HANDLERS=${IO_HANDLERS} PODIO_USE_CLANG_FORMAT=${PODIO_USE_CLANG_FORMAT} diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index b0f601f8f..d64d9bcda 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -58,7 +58,7 @@ PROJECT_LOGO = # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. -OUTPUT_DIRECTORY = @CMAKE_BINARY_DIR@/doxygen +OUTPUT_DIRECTORY = @PROJECT_BINARY_DIR@/doxygen # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and @@ -144,7 +144,7 @@ FULL_PATH_NAMES = YES # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. -STRIP_FROM_PATH = @PROJECT_SOURCE_DIR@ @CMAKE_BINARY_DIR@ +STRIP_FROM_PATH = @PROJECT_SOURCE_DIR@ @PROJECT_BINARY_DIR@ # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which @@ -153,7 +153,7 @@ STRIP_FROM_PATH = @PROJECT_SOURCE_DIR@ @CMAKE_BINARY_DIR@ # specify the list of include paths that are normally passed to the compiler # using the -I flag. -STRIP_FROM_INC_PATH = @DOXYGEN_INCLUDE_DIRS@ @CMAKE_BINARY_DIR@/include +STRIP_FROM_INC_PATH = @DOXYGEN_INCLUDE_DIRS@ @PROJECT_BINARY_DIR@/include # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't @@ -731,7 +731,7 @@ WARN_FORMAT = "$file:$line: $text" # messages should be written. If left blank the output is written to standard # error (stderr). -WARN_LOGFILE = @CMAKE_BINARY_DIR@/doxygen-warnings.log +WARN_LOGFILE = @PROJECT_BINARY_DIR@/doxygen-warnings.log #--------------------------------------------------------------------------- # Configuration options related to the input files @@ -744,7 +744,7 @@ WARN_LOGFILE = @CMAKE_BINARY_DIR@/doxygen-warnings.log # Note: If this tag is empty the current directory is searched. INPUT = @PROJECT_SOURCE_DIR@ -INPUT += @CMAKE_BINARY_DIR@/include +INPUT += @PROJECT_BINARY_DIR@/include INPUT += @CMAKE_CURRENT_BINARY_DIR@ # This tag can be used to specify the character encoding of the source files @@ -801,7 +801,7 @@ EXCLUDE_SYMLINKS = NO # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* -EXCLUDE_PATTERNS = */tests/* */dict/* */cmake/* @CMAKE_BINARY_DIR@ +EXCLUDE_PATTERNS = */tests/* */dict/* */cmake/* @PROJECT_BINARY_DIR@ # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index eea3a105a..a17b07748 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -35,5 +35,5 @@ IF (BUILD_TESTING) if (TARGET write_sio) set_property(TEST pyunittest PROPERTY DEPENDS write_sio write_frame_sio) endif() - set_property(TEST pyunittest PROPERTY WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/tests) + set_property(TEST pyunittest PROPERTY WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/tests) ENDIF() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 60b2bdd17..6e71fa2ee 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -63,4 +63,4 @@ CREATE_PODIO_TEST(ostream_operator.cpp "") CREATE_PODIO_TEST(write_ascii.cpp "") # Customize CTest to potentially disable some of the tests with known problems -configure_file(CTestCustom.cmake ${CMAKE_BINARY_DIR}/CTestCustom.cmake @ONLY) +configure_file(CTestCustom.cmake ${PROJECT_BINARY_DIR}/CTestCustom.cmake @ONLY) diff --git a/tests/dumpmodel/CMakeLists.txt b/tests/dumpmodel/CMakeLists.txt index d796325cb..345609210 100644 --- a/tests/dumpmodel/CMakeLists.txt +++ b/tests/dumpmodel/CMakeLists.txt @@ -1,7 +1,7 @@ # Add tests for storing and retrieving the EDM definitions into the produced # files add_test(NAME datamodel_def_store_roundtrip_root COMMAND ${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh - ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root + ${PROJECT_BINARY_DIR}/tests/root_io/example_frame.root datamodel ${PROJECT_SOURCE_DIR}/tests ) @@ -10,7 +10,7 @@ PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_root) # The extension model needs to know about the upstream model for generation add_test(NAME datamodel_def_store_roundtrip_root_extension COMMAND ${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh - ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root + ${PROJECT_BINARY_DIR}/tests/root_io/example_frame.root extension_model ${PROJECT_SOURCE_DIR}/tests/extension_model --upstream-edm=datamodel:${PROJECT_SOURCE_DIR}/tests/datalayout.yaml @@ -28,14 +28,14 @@ set_tests_properties( set(sio_roundtrip_tests "") if (ENABLE_SIO) add_test(NAME datamodel_def_store_roundtrip_sio COMMAND ${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh - ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio + ${PROJECT_BINARY_DIR}/tests/sio_io/example_frame.sio datamodel ${PROJECT_SOURCE_DIR}/tests ) PODIO_SET_TEST_ENV(datamodel_def_store_roundtrip_sio) # The extension model needs to know about the upstream model for generation add_test(NAME datamodel_def_store_roundtrip_sio_extension COMMAND ${PROJECT_SOURCE_DIR}/tests/scripts/dumpModelRoundTrip.sh - ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio + ${PROJECT_BINARY_DIR}/tests/sio_io/example_frame.sio extension_model ${PROJECT_SOURCE_DIR}/tests/extension_model --upstream-edm=datamodel:${PROJECT_SOURCE_DIR}/tests/datalayout.yaml diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index ad1537c23..bfa8309f3 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -54,7 +54,7 @@ if (DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR}) macro(ADD_PODIO_LEGACY_TEST version base_test input_file) add_test(NAME ${base_test}_${version} COMMAND ${base_test} ${PODIO_TEST_INPUT_DATA_DIR}/${version}/${input_file}) set_property(TEST ${base_test}_${version} PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/tests:${CMAKE_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH} + LD_LIBRARY_PATH=${PROJECT_BINARY_DIR}/tests:${PROJECT_BINARY_DIR}/src:$ENV{LD_LIBRARY_PATH} # Clear the ROOT_INCLUDE_PATH for the tests, to avoid potential conflicts # with existing headers from other installations ROOT_INCLUDE_PATH= diff --git a/tests/unittests/CMakeLists.txt b/tests/unittests/CMakeLists.txt index 1103de315..04f1e019a 100644 --- a/tests/unittests/CMakeLists.txt +++ b/tests/unittests/CMakeLists.txt @@ -83,7 +83,7 @@ else() WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} TEST_PREFIX "UT_" # make it possible to filter easily with -R ^UT TEST_SPEC ${filter_tests} # discover only tests that are known to not fail - DL_PATHS ${CMAKE_CURRENT_BINARY_DIR}:${CMAKE_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + DL_PATHS ${CMAKE_CURRENT_BINARY_DIR}:${PROJECT_BINARY_DIR}/src:$:$<$:$>:$ENV{LD_LIBRARY_PATH} PROPERTIES ENVIRONMENT PODIO_SIOBLOCK_PATH=${CMAKE_CURRENT_BINARY_DIR} diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 4c73df578..beb60e318 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -24,16 +24,16 @@ if(BUILD_TESTING) endfunction() CREATE_DUMP_TEST(podio-dump-help _dummy_target_ --help) - CREATE_DUMP_TEST(podio-dump-root-legacy "write" ${CMAKE_BINARY_DIR}/tests/root_io/example.root) - CREATE_DUMP_TEST(podio-dump-root "write_frame_root" ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root) - CREATE_DUMP_TEST(podio-dump-detailed-root "write_frame_root" --detailed --category other_events --entries 2:3 ${CMAKE_BINARY_DIR}/tests/root_io/example_frame.root) - CREATE_DUMP_TEST(podio-dump-detailed-root-legacy "write" --detailed --entries 2:3 ${CMAKE_BINARY_DIR}/tests/root_io/example.root) + CREATE_DUMP_TEST(podio-dump-root-legacy "write" ${PROJECT_BINARY_DIR}/tests/root_io/example.root) + CREATE_DUMP_TEST(podio-dump-root "write_frame_root" ${PROJECT_BINARY_DIR}/tests/root_io/example_frame.root) + CREATE_DUMP_TEST(podio-dump-detailed-root "write_frame_root" --detailed --category other_events --entries 2:3 ${PROJECT_BINARY_DIR}/tests/root_io/example_frame.root) + CREATE_DUMP_TEST(podio-dump-detailed-root-legacy "write" --detailed --entries 2:3 ${PROJECT_BINARY_DIR}/tests/root_io/example.root) if (ENABLE_SIO) - CREATE_DUMP_TEST(podio-dump-sio-legacy "write_sio" ${CMAKE_BINARY_DIR}/tests/sio_io/example.sio) - CREATE_DUMP_TEST(podio-dump-sio "write_frame_sio" --entries 4:7 ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio) - CREATE_DUMP_TEST(podio-dump-detailed-sio "write_frame_sio" --detailed --entries 9 ${CMAKE_BINARY_DIR}/tests/sio_io/example_frame.sio) - CREATE_DUMP_TEST(podio-dump-detailed-sio-legacy "write_sio" --detailed --entries 9 ${CMAKE_BINARY_DIR}/tests/sio_io/example.sio) + CREATE_DUMP_TEST(podio-dump-sio-legacy "write_sio" ${PROJECT_BINARY_DIR}/tests/sio_io/example.sio) + CREATE_DUMP_TEST(podio-dump-sio "write_frame_sio" --entries 4:7 ${PROJECT_BINARY_DIR}/tests/sio_io/example_frame.sio) + CREATE_DUMP_TEST(podio-dump-detailed-sio "write_frame_sio" --detailed --entries 9 ${PROJECT_BINARY_DIR}/tests/sio_io/example_frame.sio) + CREATE_DUMP_TEST(podio-dump-detailed-sio-legacy "write_sio" --detailed --entries 9 ${PROJECT_BINARY_DIR}/tests/sio_io/example.sio) endif() endif() From e0cb13569913b5d8a770f97f4ffa845776b66396 Mon Sep 17 00:00:00 2001 From: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> Date: Tue, 18 Jul 2023 11:34:59 +0200 Subject: [PATCH 093/100] Cache podio_PYTHON_DIR (#456) Co-authored-by: jmcarcell --- tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6e71fa2ee..ccb747007 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,7 +6,7 @@ foreach( _conf ${CMAKE_CONFIGURATION_TYPES} ) endforeach() # Set the podio_PYTHON_DIR manually here because the macros below expect it -SET(podio_PYTHON_DIR ${PROJECT_SOURCE_DIR}/python) +SET(podio_PYTHON_DIR ${PROJECT_SOURCE_DIR}/python CACHE PATH "Path to the podio python directory") PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} From 954ca264c9587687cbc363476e10447332c92037 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 18 Jul 2023 13:16:39 +0200 Subject: [PATCH 094/100] Codify some of the implicitly assumed conventions (#439) --- include/podio/Frame.h | 1 + include/podio/FrameCategories.h | 43 +++++++++++++++++++++++++++++++++ tests/read_frame.h | 18 +++++++------- tests/read_frame_auxiliary.h | 4 +-- tests/write_frame.h | 2 +- 5 files changed, 56 insertions(+), 12 deletions(-) create mode 100644 include/podio/FrameCategories.h diff --git a/include/podio/Frame.h b/include/podio/Frame.h index a71bba336..7b6f39c7a 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -4,6 +4,7 @@ #include "podio/CollectionBase.h" #include "podio/CollectionBufferFactory.h" #include "podio/CollectionIDTable.h" +#include "podio/FrameCategories.h" // mainly for convenience #include "podio/GenericParameters.h" #include "podio/ICollectionProvider.h" #include "podio/SchemaEvolution.h" diff --git a/include/podio/FrameCategories.h b/include/podio/FrameCategories.h new file mode 100644 index 000000000..321d4fc23 --- /dev/null +++ b/include/podio/FrameCategories.h @@ -0,0 +1,43 @@ +#ifndef PODIO_FRAMECATEGORIES_H +#define PODIO_FRAMECATEGORIES_H + +#include + +namespace podio { + +/** + * Create a parameterName that encodes the collection name and the parameter + * Name into one string. + * + * This codifies a convention that was decided on to store collection level + * parameters. These are parameters / metadata that are valid for all + * collections of a given name in a file, e.g. CellID encoding strings. These + * parameters are usually stored in a dedicated metadata Frame inside a file, + * see the predefined category names in the Cateogry namespace. + * + * @param collName the name of the collection + * @param paramName the name of the parameter + * + * @returns A single key string that combines the collection and parameter name + */ +inline std::string collMetadataParamName(const std::string& collName, const std::string& paramName) { + return collName + "__" + paramName; +} + +/** + * This namespace mimics an enum (at least in its usage) and simply defines + * either commonly used category names, or category names that form a + * convention. + */ +namespace Category { + /// The event category + constexpr const auto Event = "events"; + /// The run category + constexpr const auto Run = "runs"; + /// The metadata cateogry that is used to store a single Frame that holds data + /// that is valid for a whole file, e.g. collection level parameters + constexpr const auto Metadata = "metadata"; +} // namespace Category +} // namespace podio + +#endif diff --git a/tests/read_frame.h b/tests/read_frame.h index 69a48e341..6ec6423d7 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -77,13 +77,13 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { return 1; } - if (reader.getEntries("events") != 10) { + if (reader.getEntries(podio::Category::Event) != 10) { std::cerr << "Could not read back the number of events correctly. " - << "(expected:" << 10 << ", actual: " << reader.getEntries("events") << ")" << std::endl; + << "(expected:" << 10 << ", actual: " << reader.getEntries(podio::Category::Event) << ")" << std::endl; return 1; } - if (reader.getEntries("events") != reader.getEntries("other_events")) { + if (reader.getEntries(podio::Category::Event) != reader.getEntries("other_events")) { std::cerr << "Could not read back the number of events correctly. " << "(expected:" << 10 << ", actual: " << reader.getEntries("other_events") << ")" << std::endl; return 1; @@ -91,8 +91,8 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { // Read the frames in a different order than when writing them here to make // sure that the writing/reading order does not impose any usage requirements - for (size_t i = 0; i < reader.getEntries("events"); ++i) { - auto frame = podio::Frame(reader.readNextEntry("events")); + for (size_t i = 0; i < reader.getEntries(podio::Category::Event); ++i) { + auto frame = podio::Frame(reader.readNextEntry(podio::Category::Event)); if (reader.currentFileVersion() > podio::version::Version{0, 16, 2}) { if (frame.get("emptySubsetColl") == nullptr) { std::cerr << "Could not retrieve an empty subset collection" << std::endl; @@ -114,7 +114,7 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { } } - if (reader.readNextEntry("events")) { + if (reader.readNextEntry(podio::Category::Event)) { std::cerr << "Trying to read more frame data than is present should return a nullptr" << std::endl; return 1; } @@ -127,10 +127,10 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { // Reading specific (jumping to) entry { - auto frame = podio::Frame(reader.readEntry("events", 4)); + auto frame = podio::Frame(reader.readEntry(podio::Category::Event, 4)); processEvent(frame, 4, reader.currentFileVersion()); // Reading the next entry after jump, continues from after the jump - auto nextFrame = podio::Frame(reader.readNextEntry("events")); + auto nextFrame = podio::Frame(reader.readNextEntry(podio::Category::Event)); processEvent(nextFrame, 5, reader.currentFileVersion()); auto otherFrame = podio::Frame(reader.readEntry("other_events", 4)); @@ -147,7 +147,7 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { } // Trying to read a Frame that is not present returns a nullptr - if (reader.readEntry("events", 10)) { + if (reader.readEntry(podio::Category::Event, 10)) { std::cerr << "Trying to read a specific entry that does not exist should return a nullptr" << std::endl; return 1; } diff --git a/tests/read_frame_auxiliary.h b/tests/read_frame_auxiliary.h index 8ae4ff1f7..e8dacbe89 100644 --- a/tests/read_frame_auxiliary.h +++ b/tests/read_frame_auxiliary.h @@ -56,9 +56,9 @@ int test_frame_aux_info(const std::string& fileName) { reader.openFile(fileName); // Test on the first event only here. Additionally, also only testing the - // "events" category, since that is the one where not all collections are + // events category, since that is the one where not all collections are // written - auto event = podio::Frame(reader.readEntry("events", 0)); + auto event = podio::Frame(reader.readEntry(podio::Category::Event, 0)); auto collsToRead = collsToWrite; if (reader.currentFileVersion() < podio::version::Version{0, 16, 3}) { diff --git a/tests/write_frame.h b/tests/write_frame.h index 8a0a2b3d3..e4405f988 100644 --- a/tests/write_frame.h +++ b/tests/write_frame.h @@ -424,7 +424,7 @@ void write_frames(const std::string& filename) { for (int i = 0; i < 10; ++i) { auto frame = makeFrame(i); - writer.writeFrame(frame, "events", collsToWrite); + writer.writeFrame(frame, podio::Category::Event, collsToWrite); } for (int i = 100; i < 110; ++i) { From 6b8eeb1824b0fb19f00b8d581b393282a5542be9 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Thu, 20 Jul 2023 14:07:26 +0200 Subject: [PATCH 095/100] Make building SIO tests depend on ENABLE_SIO (#457) --- tests/CMakeLists.txt | 4 ++- tests/sio_io/CMakeLists.txt | 54 +++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ccb747007..fe7451c9f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -54,7 +54,9 @@ add_executable(check_benchmark_outputs check_benchmark_outputs.cpp) target_link_libraries(check_benchmark_outputs PRIVATE ROOT::Tree) add_subdirectory(root_io) -add_subdirectory(sio_io) +if (ENABLE_SIO) + add_subdirectory(sio_io) +endif() add_subdirectory(unittests) add_subdirectory(dumpmodel) diff --git a/tests/sio_io/CMakeLists.txt b/tests/sio_io/CMakeLists.txt index ee9a80be7..6eecd6d90 100644 --- a/tests/sio_io/CMakeLists.txt +++ b/tests/sio_io/CMakeLists.txt @@ -1,32 +1,28 @@ -if (TARGET TestDataModelSioBlocks) - set(sio_dependent_tests - write_sio.cpp - read_sio.cpp - read_and_write_sio.cpp - write_timed_sio.cpp - read_timed_sio.cpp - read_frame_sio.cpp - write_frame_sio.cpp - read_frame_legacy_sio.cpp) - set(sio_libs podio::podioSioIO) - foreach( sourcefile ${sio_dependent_tests} ) - CREATE_PODIO_TEST(${sourcefile} "${sio_libs}") - endforeach() +set(sio_dependent_tests + write_sio.cpp + read_sio.cpp + read_and_write_sio.cpp + write_timed_sio.cpp + read_timed_sio.cpp + read_frame_sio.cpp + write_frame_sio.cpp + read_frame_legacy_sio.cpp) +set(sio_libs podio::podioSioIO) +foreach( sourcefile ${sio_dependent_tests} ) + CREATE_PODIO_TEST(${sourcefile} "${sio_libs}") +endforeach() - # These need to be linked against TTree explicitly, since it is not done - # through another library and the TimedReader/Writer decorators are - # header-only wrappers - target_link_libraries(write_timed_sio PRIVATE ROOT::Tree) - target_link_libraries(read_timed_sio PRIVATE ROOT::Tree) -endif() +# These need to be linked against TTree explicitly, since it is not done +# through another library and the TimedReader/Writer decorators are +# header-only wrappers +target_link_libraries(write_timed_sio PRIVATE ROOT::Tree) +target_link_libraries(read_timed_sio PRIVATE ROOT::Tree) -if (TARGET read_sio) - set_property(TEST read_sio PROPERTY DEPENDS write_sio) - set_property(TEST read_and_write_sio PROPERTY DEPENDS write_sio) - set_property(TEST read_timed_sio PROPERTY DEPENDS write_timed_sio) - set_property(TEST read_frame_sio PROPERTY DEPENDS write_frame_sio) - set_property(TEST read_frame_legacy_sio PROPERTY DEPENDS write_sio) +set_property(TEST read_sio PROPERTY DEPENDS write_sio) +set_property(TEST read_and_write_sio PROPERTY DEPENDS write_sio) +set_property(TEST read_timed_sio PROPERTY DEPENDS write_timed_sio) +set_property(TEST read_frame_sio PROPERTY DEPENDS write_frame_sio) +set_property(TEST read_frame_legacy_sio PROPERTY DEPENDS write_sio) - add_test(NAME check_benchmark_outputs_sio COMMAND check_benchmark_outputs write_benchmark_sio.root read_benchmark_sio.root) - set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio write_timed_sio) -endif() +add_test(NAME check_benchmark_outputs_sio COMMAND check_benchmark_outputs write_benchmark_sio.root read_benchmark_sio.root) +set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio write_timed_sio) From 25e5e3aa92d784e37c7ade6306451a3e4439fcf5 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 25 Jul 2023 13:28:57 +0200 Subject: [PATCH 096/100] Make sure that v00-16-06 files can be read with the master branch (#461) * Add v00-16-06 test files * Make the reader read v00-16-06 files correctly * Bump patch version to 99 for development --- CMakeLists.txt | 2 +- src/ROOTFrameReader.cc | 2 +- tests/CMakeLists.txt | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 35c521882..4f4ad10ae 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ project(podio) #--- Version ------------------------------------------------------------------- SET( ${PROJECT_NAME}_VERSION_MAJOR 0 ) SET( ${PROJECT_NAME}_VERSION_MINOR 16 ) -SET( ${PROJECT_NAME}_VERSION_PATCH 6 ) +SET( ${PROJECT_NAME}_VERSION_PATCH 99 ) SET( ${PROJECT_NAME}_VERSION "${${PROJECT_NAME}_VERSION_MAJOR}.${${PROJECT_NAME}_VERSION_MINOR}.${${PROJECT_NAME}_VERSION_PATCH}" ) diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index d7da9dfa9..f098ad204 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -155,7 +155,7 @@ void ROOTFrameReader::initCategory(CategoryInfo& catInfo, const std::string& cat // For backwards compatibility make it possible to read the index based files // from older versions - if (m_fileVersion <= podio::version::Version{0, 16, 5}) { + if (m_fileVersion <= podio::version::Version{0, 16, 6}) { std::tie(catInfo.branches, catInfo.storedClasses) = createCollectionBranchesIndexBased(catInfo.chain.get(), *catInfo.table, *collInfo); } else { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index fe7451c9f..bfd203fb4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -45,6 +45,7 @@ set(legacy_test_versions v00-16 v00-16-02 v00-16-05 + v00-16-06 ) ### Define the actual tests From 65b58eac132f5685591028b9d103ffbaad9ca196 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 25 Jul 2023 14:22:47 +0200 Subject: [PATCH 097/100] Add python bindings for writing Frames (#447) * Add put method to Frame python bindings * Add a test that writes a file via the python bindings and ROOT * Add a c++ test that reads content written in python * Add a put_parameter method to the python Frame --- python/podio/base_writer.py | 25 +++++ python/podio/frame.py | 114 +++++++++++++++++++++-- python/podio/root_io.py | 14 ++- python/podio/sio_io.py | 14 ++- python/podio/test_Frame.py | 59 ++++++++++++ python/podio/test_utils.py | 54 ++++++++++- tests/CTestCustom.cmake | 4 + tests/read_python_frame.h | 106 +++++++++++++++++++++ tests/root_io/CMakeLists.txt | 7 ++ tests/root_io/read_python_frame_root.cpp | 7 ++ tests/root_io/write_frame_root.py | 7 ++ tests/sio_io/read_python_frame_sio.cpp | 7 ++ tests/sio_io/write_frame_sio.py | 7 ++ 13 files changed, 411 insertions(+), 14 deletions(-) create mode 100644 python/podio/base_writer.py create mode 100644 tests/read_python_frame.h create mode 100644 tests/root_io/read_python_frame_root.cpp create mode 100644 tests/root_io/write_frame_root.py create mode 100644 tests/sio_io/read_python_frame_sio.cpp create mode 100644 tests/sio_io/write_frame_sio.py diff --git a/python/podio/base_writer.py b/python/podio/base_writer.py new file mode 100644 index 000000000..6f2b5777a --- /dev/null +++ b/python/podio/base_writer.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +"""Python module for defining the basic writer interface that is used by the +backend specific bindings""" + + +class BaseWriterMixin: + """Mixin class that defines the base interface of the writers. + + The backend specific writers inherit from here and have to initialize the + following members: + - _writer: The actual writer that is able to write frames + """ + + def write_frame(self, frame, category, collections=None): + """Write the given frame under the passed category, optionally limiting the + collections that are written. + + Args: + frame (podio.frame.Frame): The Frame to write + category (str): The category name + collections (optional, default=None): The subset of collections to + write. If None, all collections are written + """ + # pylint: disable-next=protected-access + self._writer.writeFrame(frame._frame, category, collections or frame.collections) diff --git a/python/podio/frame.py b/python/podio/frame.py index 4822b00df..1a69d7a46 100644 --- a/python/podio/frame.py +++ b/python/podio/frame.py @@ -48,16 +48,38 @@ def _determine_cpp_type(idx_and_type): SUPPORTED_PARAMETER_TYPES = _determine_supported_parameter_types() -def _get_cpp_vector_types(type_str): - """Get the possible std::vector from the passed py_type string.""" - # Gather a list of all types that match the type_str (c++ or python) +def _get_cpp_types(type_str): + """Get all possible c++ types from the passed py_type string.""" types = list(filter(lambda t: type_str in t, SUPPORTED_PARAMETER_TYPES)) if not types: raise ValueError(f'{type_str} cannot be mapped to a valid parameter type') + return types + + +def _get_cpp_vector_types(type_str): + """Get the possible std::vector from the passed py_type string.""" + # Gather a list of all types that match the type_str (c++ or python) + types = _get_cpp_types(type_str) return [f'std::vector<{t}>' for t in map(lambda x: x[0], types)] +def _is_collection_base(thing): + """Check whether the passed thing is a podio::CollectionBase + + Args: + thing (any): any object + + Returns: + bool: True if thing is a base of podio::CollectionBase, False otherwise + """ + # Make sure to only instantiate the template with things that cppyy + # understands + if "cppyy" in repr(thing): + return cppyy.gbl.std.is_base_of[cppyy.gbl.podio.CollectionBase, type(thing)].value + return False + + class Frame: """Frame class that serves as a container of collection and meta data.""" @@ -78,17 +100,16 @@ def __init__(self, data=None): else: self._frame = podio.Frame() - self._collections = tuple(str(s) for s in self._frame.getAvailableCollections()) - self._param_key_types = self._init_param_keys() + self._param_key_types = self._get_param_keys_types() @property def collections(self): - """Get the available collection (names) from this Frame. + """Get the currently available collection (names) from this Frame. Returns: tuple(str): The names of the available collections from this Frame. """ - return self._collections + return tuple(str(s) for s in self._frame.getAvailableCollections()) def get(self, name): """Get a collection from the Frame by name. @@ -107,9 +128,32 @@ def get(self, name): raise KeyError(f"Collection '{name}' is not available") return collection + def put(self, collection, name): + """Put the collection into the frame + + The passed collectoin is "moved" into the Frame, i.e. it cannot be used any + longer after a call to this function. This also means that only objects that + were in the collection at the time of calling this function will be + available afterwards. + + Args: + collection (podio.CollectionBase): The collection to put into the Frame + name (str): The name of the collection + + Returns: + podio.CollectionBase: The reference to the collection that has been put + into the Frame. NOTE: That mutating this collection is not allowed. + + Raises: + ValueError: If collection is not actually a podio.CollectionBase + """ + if not _is_collection_base(collection): + raise ValueError("Can only put podio collections into a Frame") + return self._frame.put(cppyy.gbl.std.move(collection), name) + @property def parameters(self): - """Get the available parameter names from this Frame. + """Get the currently available parameter names from this Frame. Returns: tuple (str): The names of the available parameters from this Frame. @@ -163,6 +207,58 @@ def _get_param_value(par_type, name): return _get_param_value(vec_types[0], name) + def put_parameter(self, key, value, as_type=None): + """Put a parameter into the Frame. + + Puts a parameter into the Frame after doing some (incomplete) type checks. + If a list is passed the parameter type is determined from looking at the + first element of the list only. Additionally, since python doesn't + differentiate between floats and doubles, floats will always be stored as + doubles by default, use the as_type argument to change this if necessary. + + Args: + key (str): The name of the parameter + value (int, float, str or list of these): The parameter value + as_type (str, optional): Explicitly specify the type that should be used + to put the parameter into the Frame. Python types (e.g. "str") will + be converted to c++ types. This will override any automatic type + deduction that happens otherwise. Note that this will be taken at + pretty much face-value and there are only limited checks for this. + + Raises: + ValueError: If a non-supported parameter type is passed + """ + # For lists we determine the c++ vector type and use that to call the + # correct template overload explicitly + if isinstance(value, (list, tuple)): + type_name = as_type or type(value[0]).__name__ + vec_types = _get_cpp_vector_types(type_name) + if len(vec_types) == 0: + raise ValueError(f"Cannot put a parameter of type {type_name} into a Frame") + + par_type = vec_types[0] + if isinstance(value[0], float): + # Always store floats as doubles from the python side + par_type = par_type.replace("float", "double") + + self._frame.putParameter[par_type](key, value) + else: + if as_type is not None: + cpp_types = _get_cpp_types(as_type) + if len(cpp_types) == 0: + raise ValueError(f"Cannot put a parameter of type {as_type} into a Frame") + self._frame.putParameter[cpp_types[0]](key, value) + + # If we have a single integer, a std::string overload kicks in with higher + # priority than the template for some reason. So we explicitly select the + # correct template here + elif isinstance(value, int): + self._frame.putParameter["int"](key, value) + else: + self._frame.putParameter(key, value) + + self._param_key_types = self._get_param_keys_types() # refresh the cache + def get_parameters(self): """Get the complete podio::GenericParameters object stored in this Frame. @@ -200,7 +296,7 @@ def get_param_info(self, name): return par_infos - def _init_param_keys(self): + def _get_param_keys_types(self): """Initialize the param keys dict for easier lookup of the available parameters. Returns: diff --git a/python/podio/root_io.py b/python/podio/root_io.py index a5f25950e..9623ee24d 100644 --- a/python/podio/root_io.py +++ b/python/podio/root_io.py @@ -6,8 +6,7 @@ from ROOT import podio # noqa: E402 # pylint: disable=wrong-import-position from podio.base_reader import BaseReaderMixin # pylint: disable=wrong-import-position - -Writer = podio.ROOTFrameWriter +from podio.base_writer import BaseWriterMixin # pylint: disable=wrong-import-position class Reader(BaseReaderMixin): @@ -49,3 +48,14 @@ def __init__(self, filenames): self._is_legacy = True super().__init__() + + +class Writer(BaseWriterMixin): + """Writer class for writing podio root files""" + def __init__(self, filename): + """Create a writer for writing files + + Args: + filename (str): The name of the output file + """ + self._writer = podio.ROOTFrameWriter(filename) diff --git a/python/podio/sio_io.py b/python/podio/sio_io.py index 01f9d577f..30257a860 100644 --- a/python/podio/sio_io.py +++ b/python/podio/sio_io.py @@ -9,8 +9,7 @@ from ROOT import podio # noqa: 402 # pylint: disable=wrong-import-position from podio.base_reader import BaseReaderMixin # pylint: disable=wrong-import-position - -Writer = podio.SIOFrameWriter +from podio.base_writer import BaseWriterMixin # pylint: disable=wrong-import-position class Reader(BaseReaderMixin): @@ -46,3 +45,14 @@ def __init__(self, filename): self._is_legacy = True super().__init__() + + +class Writer(BaseWriterMixin): + """Writer class for writing podio root files""" + def __init__(self, filename): + """Create a writer for writing files + + Args: + filename (str): The name of the output file + """ + self._writer = podio.SIOFrameWriter(filename) diff --git a/python/podio/test_Frame.py b/python/podio/test_Frame.py index f8ec1ad96..c09143056 100644 --- a/python/podio/test_Frame.py +++ b/python/podio/test_Frame.py @@ -7,6 +7,8 @@ # using root_io as that should always be present regardless of which backends are built from podio.root_io import Reader +from podio.test_utils import ExampleHitCollection + # The expected collections in each frame EXPECTED_COLL_NAMES = { 'arrays', 'WithVectorMember', 'info', 'fixedWidthInts', 'mcparticles', @@ -34,6 +36,63 @@ def test_frame_invalid_access(self): with self.assertRaises(KeyError): _ = frame.get_parameter('NonExistantParameter') + with self.assertRaises(ValueError): + collection = [1, 2, 4] + _ = frame.put(collection, "invalid_collection_type") + + def test_frame_put_collection(self): + """Check that putting a collection works as expected""" + frame = Frame() + self.assertEqual(frame.collections, tuple()) + + hits = ExampleHitCollection() + hits.create() + hits2 = frame.put(hits, "hits_from_python") + self.assertEqual(frame.collections, tuple(["hits_from_python"])) + # The original collection is gone at this point, and ideally just leaves an + # empty shell + self.assertEqual(len(hits), 0) + # On the other hand the return value of put has the original content + self.assertEqual(len(hits2), 1) + + def test_frame_put_parameters(self): + """Check that putting a parameter works as expected""" + frame = Frame() + self.assertEqual(frame.parameters, tuple()) + + frame.put_parameter("a_string_param", "a string") + self.assertEqual(frame.parameters, tuple(["a_string_param"])) + self.assertEqual(frame.get_parameter("a_string_param"), "a string") + + frame.put_parameter("float_param", 3.14) + self.assertEqual(frame.get_parameter("float_param"), 3.14) + + frame.put_parameter("int", 42) + self.assertEqual(frame.get_parameter("int"), 42) + + frame.put_parameter("string_vec", ["a", "b", "cd"]) + str_vec = frame.get_parameter("string_vec") + self.assertEqual(len(str_vec), 3) + self.assertEqual(str_vec, ["a", "b", "cd"]) + + frame.put_parameter("more_ints", [1, 2345]) + int_vec = frame.get_parameter("more_ints") + self.assertEqual(len(int_vec), 2) + self.assertEqual(int_vec, [1, 2345]) + + frame.put_parameter("float_vec", [1.23, 4.56, 7.89]) + vec = frame.get_parameter("float_vec", as_type="double") + self.assertEqual(len(vec), 3) + self.assertEqual(vec, [1.23, 4.56, 7.89]) + + frame.put_parameter("real_float_vec", [1.23, 4.56, 7.89], as_type="float") + f_vec = frame.get_parameter("real_float_vec", as_type="float") + self.assertEqual(len(f_vec), 3) + self.assertEqual(vec, [1.23, 4.56, 7.89]) + + frame.put_parameter("float_as_float", 3.14, as_type="float") + self.assertAlmostEqual(frame.get_parameter("float_as_float"), 3.14, places=5) + class FrameReadTest(unittest.TestCase): """Unit tests for the Frame python bindings for Frames read from file. diff --git a/python/podio/test_utils.py b/python/podio/test_utils.py index 2c5e282b6..44efc9cce 100644 --- a/python/podio/test_utils.py +++ b/python/podio/test_utils.py @@ -2,5 +2,57 @@ """Utilities for python unittests""" import os +import ROOT +ROOT.gSystem.Load("libTestDataModelDict.so") # noqa: E402 +from ROOT import ExampleHitCollection, ExampleClusterCollection # noqa: E402 # pylint: disable=wrong-import-position -SKIP_SIO_TESTS = os.environ.get('SKIP_SIO_TESTS', '1') == '1' +from podio.frame import Frame # pylint: disable=wrong-import-position + + +SKIP_SIO_TESTS = os.environ.get("SKIP_SIO_TESTS", "1") == "1" + + +def create_hit_collection(): + """Create a simple hit collection with two hits for testing""" + hits = ExampleHitCollection() + hits.create(0xBAD, 0.0, 0.0, 0.0, 23.0) + hits.create(0xCAFFEE, 1.0, 0.0, 0.0, 12.0) + + return hits + + +def create_cluster_collection(): + """Create a simple cluster collection with two clusters""" + clusters = ExampleClusterCollection() + clu0 = clusters.create() + clu0.energy(3.14) + clu1 = clusters.create() + clu1.energy(1.23) + + return clusters + + +def create_frame(): + """Create a frame with an ExampleHit and an ExampleCluster collection""" + frame = Frame() + hits = create_hit_collection() + frame.put(hits, "hits_from_python") + clusters = create_cluster_collection() + frame.put(clusters, "clusters_from_python") + + frame.put_parameter("an_int", 42) + frame.put_parameter("some_floats", [1.23, 7.89, 3.14]) + frame.put_parameter("greetings", ["from", "python"]) + frame.put_parameter("real_float", 3.14, as_type="float") + frame.put_parameter("more_real_floats", [1.23, 4.56, 7.89], as_type="float") + + return frame + + +def write_file(writer_type, filename): + """Write a file using the given Writer type and put one Frame into it under + the events category + """ + writer = writer_type(filename) + event = create_frame() + writer.write_frame(event, "events") diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index b0e683f65..d4d05cd2a 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -22,6 +22,8 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read-legacy-files-root_v00-13 read_frame_legacy_root read_frame_root_multiple + write_python_frame_root + read_python_frame_root write_frame_root read_frame_root @@ -35,6 +37,8 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ write_frame_sio read_frame_sio read_frame_legacy_sio + write_python_frame_sio + read_python_frame_sio write_ascii diff --git a/tests/read_python_frame.h b/tests/read_python_frame.h new file mode 100644 index 000000000..5a06cc4ce --- /dev/null +++ b/tests/read_python_frame.h @@ -0,0 +1,106 @@ +#ifndef PODIO_TESTS_READ_PYTHON_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_READ_PYTHON_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include "datamodel/ExampleClusterCollection.h" +#include "datamodel/ExampleHitCollection.h" + +#include "podio/Frame.h" + +#include + +int checkHits(const ExampleHitCollection& hits) { + if (hits.size() != 2) { + std::cerr << "There should be two hits in the collection (actual size: " << hits.size() << ")" << std::endl; + return 1; + } + + auto hit1 = hits[0]; + if (hit1.cellID() != 0xbad || hit1.x() != 0.0 || hit1.y() != 0.0 || hit1.z() != 0.0 || hit1.energy() != 23.0) { + std::cerr << "Could not retrieve the correct hit[0]: (expected: " << ExampleHit(0xbad, 0.0, 0.0, 0.0, 23.0) + << ", actual: " << hit1 << ")" << std::endl; + return 1; + } + + auto hit2 = hits[1]; + if (hit2.cellID() != 0xcaffee || hit2.x() != 1.0 || hit2.y() != 0.0 || hit2.z() != 0.0 || hit2.energy() != 12.0) { + std::cerr << "Could not retrieve the correct hit[1]: (expected: " << ExampleHit(0xcaffee, 1.0, 0.0, 0.0, 12.0) + << ", actual: " << hit1 << ")" << std::endl; + return 1; + } + + return 0; +} + +int checkClusters(const ExampleClusterCollection& clusters) { + if (clusters.size() != 2) { + std::cerr << "There should be two clusters in the collection (actual size: " << clusters.size() << ")" << std::endl; + return 1; + } + + if (clusters[0].energy() != 3.14 || clusters[1].energy() != 1.23) { + std::cerr << "Energies of the clusters is wrong: (expected: 3.14 and 1.23, actual " << clusters[0].energy() + << " and " << clusters[1].energy() << ")" << std::endl; + return 1; + } + + return 0; +} + +template +std::ostream& operator<<(std::ostream& o, const std::vector& vec) { + auto delim = "["; + for (const auto& v : vec) { + o << std::exchange(delim, ", ") << v; + } + return o << "]"; +} + +int checkParameters(const podio::Frame& frame) { + const auto iVal = frame.getParameter("an_int"); + if (iVal != 42) { + std::cerr << "Parameter an_int was not stored correctly (expected 42, actual " << iVal << ")" << std::endl; + return 1; + } + + const auto& dVal = frame.getParameter>("some_floats"); + if (dVal.size() != 3 || dVal[0] != 1.23 || dVal[1] != 7.89 || dVal[2] != 3.14) { + std::cerr << "Parameter some_floats was not stored correctly (expected [1.23, 7.89, 3.14], actual " << dVal << ")" + << std::endl; + return 1; + } + + const auto& strVal = frame.getParameter>("greetings"); + if (strVal.size() != 2 || strVal[0] != "from" || strVal[1] != "python") { + std::cerr << "Parameter greetings was not stored correctly (expected [from, python], actual " << strVal << ")" + << std::endl; + return 1; + } + + const auto realFloat = frame.getParameter("real_float"); + if (realFloat != 3.14f) { + std::cerr << "Parameter real_float was not stored correctly (expected 3.14, actual " << realFloat << ")" + << std::endl; + return 1; + } + + const auto& realFloats = frame.getParameter>("more_real_floats"); + if (realFloats.size() != 3 || realFloats[0] != 1.23f || realFloats[1] != 4.56f || realFloats[2] != 7.89f) { + std::cerr << "Parameter more_real_floats was not stored as correctly (expected [1.23, 4.56, 7.89], actual" + << realFloats << ")" << std::endl; + } + + return 0; +} + +template +int read_frame(const std::string& filename) { + auto reader = ReaderT(); + reader.openFile(filename); + + auto event = podio::Frame(reader.readEntry("events", 0)); + + return checkHits(event.get("hits_from_python")) + + checkClusters(event.get("clusters_from_python")) + checkParameters(event); +} + +#endif // PODIO_TESTS_READ_PYTHON_FRAME_H diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index bfa8309f3..5c867a9fe 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -11,6 +11,7 @@ set(root_dependent_tests write_frame_root.cpp read_frame_legacy_root.cpp read_frame_root_multiple.cpp + read_python_frame_root.cpp ) if(ENABLE_RNTUPLE) set(root_dependent_tests @@ -69,3 +70,9 @@ if (DEFINED CACHE{PODIO_TEST_INPUT_DATA_DIR}) ADD_PODIO_LEGACY_TEST(${version} read_frame_legacy_root example.root legacy_test_cases) endforeach() endif() + +#--- Write via python and the ROOT backend and see if we can read it back in in +#--- c++ +add_test(NAME write_python_frame_root COMMAND python3 ${CMAKE_CURRENT_LIST_DIR}/write_frame_root.py) +PODIO_SET_TEST_ENV(write_python_frame_root) +set_property(TEST read_python_frame_root PROPERTY DEPENDS write_python_frame_root) diff --git a/tests/root_io/read_python_frame_root.cpp b/tests/root_io/read_python_frame_root.cpp new file mode 100644 index 000000000..23d1c0015 --- /dev/null +++ b/tests/root_io/read_python_frame_root.cpp @@ -0,0 +1,7 @@ +#include "read_python_frame.h" + +#include "podio/ROOTFrameReader.h" + +int main() { + return read_frame("example_frame_with_py.root"); +} diff --git a/tests/root_io/write_frame_root.py b/tests/root_io/write_frame_root.py new file mode 100644 index 000000000..38bece171 --- /dev/null +++ b/tests/root_io/write_frame_root.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +"""Script to write a Frame in ROOT format""" + +from podio import test_utils +from podio.root_io import Writer + +test_utils.write_file(Writer, "example_frame_with_py.root") diff --git a/tests/sio_io/read_python_frame_sio.cpp b/tests/sio_io/read_python_frame_sio.cpp new file mode 100644 index 000000000..61c3eb481 --- /dev/null +++ b/tests/sio_io/read_python_frame_sio.cpp @@ -0,0 +1,7 @@ +#include "read_python_frame.h" + +#include "podio/SIOFrameReader.h" + +int main() { + return read_frame("example_frame_with_py.sio"); +} diff --git a/tests/sio_io/write_frame_sio.py b/tests/sio_io/write_frame_sio.py new file mode 100644 index 000000000..94e08aa27 --- /dev/null +++ b/tests/sio_io/write_frame_sio.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +"""Script to write a Frame in SIO format""" + +from podio import test_utils +from podio.sio_io import Writer + +test_utils.write_file(Writer, "example_frame_with_py.sio") From dbf9425aa6d2b956c4c6fa49d45abe40cbe9e5af Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 26 Jul 2023 08:38:29 +0200 Subject: [PATCH 098/100] Fix I/O issues of subset collections with vector members (#463) * Add tests that reproduce crash / failure * Only recast vector members for non-subset collections * Only read / write vector member buffers if they exist with SIO * Make pyton tests aware of new collection * [format] flake8 fixes * Fix typo in test output * Cleanup unittests * Actually populate an empty test section * Add a test case show-casing the failure * Make sure to only access vector members if available * Add tests read and write tests for SIO * Add I/O tests to ignore list for sanitizer builds --- python/podio/test_Frame.py | 5 ++- python/templates/Collection.cc.jinja2 | 3 +- python/templates/CollectionData.cc.jinja2 | 8 +++-- python/templates/SIOBlock.cc.jinja2 | 20 ++++++----- python/templates/macros/sioblocks.jinja2 | 18 +++++----- tests/CTestCustom.cmake | 2 ++ tests/read_and_write_frame.h | 38 +++++++++++++++++++++ tests/read_frame.h | 13 +++++++ tests/root_io/CMakeLists.txt | 13 +++++-- tests/root_io/read_and_write_frame_root.cpp | 9 +++++ tests/sio_io/CMakeLists.txt | 14 ++++++-- tests/sio_io/read_and_write_frame_sio.cpp | 9 +++++ tests/unittests/unittest.cpp | 27 +++++++-------- tests/write_frame.h | 12 ++++++- 14 files changed, 149 insertions(+), 42 deletions(-) create mode 100644 tests/read_and_write_frame.h create mode 100644 tests/root_io/read_and_write_frame_root.cpp create mode 100644 tests/sio_io/read_and_write_frame_sio.cpp diff --git a/python/podio/test_Frame.py b/python/podio/test_Frame.py index c09143056..7761deaf0 100644 --- a/python/podio/test_Frame.py +++ b/python/podio/test_Frame.py @@ -18,7 +18,10 @@ 'emptyCollection', 'emptySubsetColl' } # The expected collections from the extension (only present in the other_events category) -EXPECTED_EXTENSION_COLL_NAMES = {"extension_Contained", "extension_ExternalComponent", "extension_ExternalRelation"} +EXPECTED_EXTENSION_COLL_NAMES = { + "extension_Contained", "extension_ExternalComponent", "extension_ExternalRelation", + "VectorMemberSubsetColl" + } # The expected parameter names in each frame EXPECTED_PARAM_NAMES = {'anInt', 'UserEventWeight', 'UserEventName', 'SomeVectorData', 'SomeValue'} diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 99cbd01af..44b4c139d 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -198,14 +198,15 @@ podio::CollectionReadBuffers createBuffers(bool isSubset) { }; readBuffers.recast = [](podio::CollectionReadBuffers& buffers) { + // We only have any of these buffers if this is not a subset collection if (buffers.data) { buffers.data = podio::CollectionWriteBuffers::asVector<{{ class.full_type }}Data>(buffers.data); - } {% if VectorMembers %} {% for member in VectorMembers %} (*buffers.vectorMembers)[{{ loop.index0 }}].second = podio::CollectionWriteBuffers::asVector<{{ member.full_type }}>((*buffers.vectorMembers)[{{ loop.index0 }}].second); {% endfor %} {% endif %} + } }; return readBuffers; diff --git a/python/templates/CollectionData.cc.jinja2 b/python/templates/CollectionData.cc.jinja2 index 3946ad756..cfa143060 100644 --- a/python/templates/CollectionData.cc.jinja2 +++ b/python/templates/CollectionData.cc.jinja2 @@ -83,11 +83,13 @@ void {{ class_type }}::clear(bool isSubsetColl) { podio::CollectionWriteBuffers {{ class_type }}::getCollectionBuffers(bool isSubsetColl) { {% if VectorMembers %} - // Make sure these point to the right place, even if a collection has been - // moved since it has been created + if (!isSubsetColl) { + // Make sure these point to the right place, even if a collection has been + // moved since it has been created {% for member in VectorMembers %} - m_vecmem_info[{{ loop.index0 }}].second = &m_vec_{{ member.name }}; + m_vecmem_info[{{ loop.index0 }}].second = &m_vec_{{ member.name }}; {% endfor %} + } {% endif -%} return { diff --git a/python/templates/SIOBlock.cc.jinja2 b/python/templates/SIOBlock.cc.jinja2 index f8090ea75..4fc065b46 100644 --- a/python/templates/SIOBlock.cc.jinja2 +++ b/python/templates/SIOBlock.cc.jinja2 @@ -40,18 +40,18 @@ void {{ block_class }}::read(sio::read_device& device, sio::version_type version } {% if VectorMembers %} + if (not m_subsetColl) { {% for member in VectorMembers %} - // auto {{ member.name }}Buffers = new std::vector<{{ member.full_type }}>(); - // m_buffers.vectorMembers->emplace_back("{{ member.full_type }}", &{{ member.name }}Buffers); - m_buffers.vectorMembers->emplace_back("{{ member.full_type }}", new std::vector<{{ member.full_type }}>()); + m_buffers.vectorMembers->emplace_back("{{ member.full_type }}", new std::vector<{{ member.full_type }}>()); {% endfor %} - //---- read vector members - auto* vecMemInfo = m_buffers.vectorMembers; - unsigned size{0}; + //---- read vector members + auto* vecMemInfo = m_buffers.vectorMembers; + unsigned size{0}; {% for member in VectorMembers %} {{ macros.vector_member_read(member, loop.index0) }} {% endfor %} + } {% endif %} } @@ -72,13 +72,15 @@ void {{ block_class }}::write(sio::write_device& device) { } {% if VectorMembers %} - //---- write vector members - auto* vecMemInfo = m_buffers.vectorMembers; - unsigned size{0}; + if (not m_subsetColl) { + //---- write vector members + auto* vecMemInfo = m_buffers.vectorMembers; + unsigned size{0}; {% for member in VectorMembers %} {{ macros.vector_member_write(member, loop.index0) }} {% endfor %} + } {% endif %} } diff --git a/python/templates/macros/sioblocks.jinja2 b/python/templates/macros/sioblocks.jinja2 index b3b903f8b..e3893b142 100644 --- a/python/templates/macros/sioblocks.jinja2 +++ b/python/templates/macros/sioblocks.jinja2 @@ -1,16 +1,16 @@ {% macro vector_member_write(member, index) %} - auto* vec{{ index }} = *reinterpret_cast**>(vecMemInfo->at({{ index }}).second); - size = vec{{ index }}->size(); - device.data(size); - podio::handlePODDataSIO(device, &(*vec{{ index }})[0], size); + auto* vec{{ index }} = *reinterpret_cast**>(vecMemInfo->at({{ index }}).second); + size = vec{{ index }}->size(); + device.data(size); + podio::handlePODDataSIO(device, &(*vec{{ index }})[0], size); {% endmacro %} {% macro vector_member_read(member, index) %} - auto* vec{{ index }} = reinterpret_cast*>(vecMemInfo->at({{ index }}).second); - size = 0u; - device.data(size); - vec{{ index }}->resize(size); - podio::handlePODDataSIO(device, vec{{ index }}->data(), size); + auto* vec{{ index }} = reinterpret_cast*>(vecMemInfo->at({{ index }}).second); + size = 0u; + device.data(size); + vec{{ index }}->resize(size); + podio::handlePODDataSIO(device, vec{{ index }}->data(), size); {% endmacro %} diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index d4d05cd2a..59d9efa81 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -24,6 +24,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read_frame_root_multiple write_python_frame_root read_python_frame_root + read_and_write_frame_root write_frame_root read_frame_root @@ -39,6 +40,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read_frame_legacy_sio write_python_frame_sio read_python_frame_sio + read_and_write_frame_sio write_ascii diff --git a/tests/read_and_write_frame.h b/tests/read_and_write_frame.h new file mode 100644 index 000000000..b2d75e23c --- /dev/null +++ b/tests/read_and_write_frame.h @@ -0,0 +1,38 @@ +#ifndef PODIO_TESTS_READ_AND_WRITE_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_READ_AND_WRITE_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include "read_frame.h" + +#include + +template +int rewrite_frames(const std::string& inputFile, const std::string& newOutput) { + auto reader = ReaderT(); + reader.openFile(inputFile); + + auto writer = WriterT(newOutput); + + const auto frame = podio::Frame(reader.readEntry(podio::Category::Event, 0)); + writer.writeFrame(frame, podio::Category::Event); + + const auto otherFrame = podio::Frame(reader.readEntry("other_events", 0)); + writer.writeFrame(otherFrame, "other_events"); + + return 0; +} + +template +int read_rewritten_frames(const std::string& inputName) { + auto reader = ReaderT(); + reader.openFile(inputName); + + const auto frame = podio::Frame(reader.readEntry(podio::Category::Event, 0)); + processEvent(frame, 0, reader.currentFileVersion()); + + const auto otherFrame = podio::Frame(reader.readEntry("other_events", 0)); + processEvent(otherFrame, 100, reader.currentFileVersion()); + + return 0; +} + +#endif // PODIO_TESTS_READ_AND_WRITE_FRAME_H diff --git a/tests/read_frame.h b/tests/read_frame.h index 6ec6423d7..321623b96 100644 --- a/tests/read_frame.h +++ b/tests/read_frame.h @@ -1,6 +1,7 @@ #ifndef PODIO_TESTS_READ_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable #define PODIO_TESTS_READ_FRAME_H // NOLINT(llvm-header-guard): folder structure not suitable +#include "datamodel/ExampleWithVectorMemberCollection.h" #include "read_test.h" #include "extension_model/ContainedTypeCollection.h" @@ -60,6 +61,14 @@ void processExtensions(const podio::Frame& event, int iEvent, podio::version::Ve ASSERT(structs[2].y == 2 * iEvent, "struct value not as expected"); } +void checkVecMemSubsetColl(const podio::Frame& event) { + const auto& subsetColl = event.get("VectorMemberSubsetColl"); + const auto& origColl = event.get("WithVectorMember"); + ASSERT(subsetColl.isSubsetCollection(), "subset collection not read back as a subset collection"); + ASSERT(subsetColl.size() == 1, "subset collection should have size 1"); + ASSERT(subsetColl[0] == origColl[0], "subset coll does not have the right contents"); +} + template int read_frames(const std::string& filename, bool assertBuildVersion = true) { auto reader = ReaderT(); @@ -112,6 +121,10 @@ int read_frames(const std::string& filename, bool assertBuildVersion = true) { if (reader.currentFileVersion() > podio::version::Version{0, 16, 2}) { processExtensions(otherFrame, i + 100, reader.currentFileVersion()); } + // As well as a test for the vector members subset category + if (reader.currentFileVersion() >= podio::version::Version{0, 16, 99}) { + checkVecMemSubsetColl(otherFrame); + } } if (reader.readNextEntry(podio::Category::Event)) { diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index 5c867a9fe..509749643 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -12,6 +12,7 @@ set(root_dependent_tests read_frame_legacy_root.cpp read_frame_root_multiple.cpp read_python_frame_root.cpp + read_and_write_frame_root.cpp ) if(ENABLE_RNTUPLE) set(root_dependent_tests @@ -33,8 +34,16 @@ set_property(TEST read-multiple PROPERTY DEPENDS write) set_property(TEST read_and_write PROPERTY DEPENDS write) set_property(TEST read_frame_legacy_root PROPERTY DEPENDS write) set_property(TEST read_timed PROPERTY DEPENDS write_timed) -set_property(TEST read_frame_root PROPERTY DEPENDS write_frame_root) -set_property(TEST read_frame_root_multiple PROPERTY DEPENDS write_frame_root) + +set_tests_properties( + read_frame_root + read_frame_root_multiple + read_and_write_frame_root + + PROPERTIES + DEPENDS write_frame_root +) + if(ENABLE_RNTUPLE) set_property(TEST read_rntuple PROPERTY DEPENDS write_rntuple) endif() diff --git a/tests/root_io/read_and_write_frame_root.cpp b/tests/root_io/read_and_write_frame_root.cpp new file mode 100644 index 000000000..c25840289 --- /dev/null +++ b/tests/root_io/read_and_write_frame_root.cpp @@ -0,0 +1,9 @@ +#include "read_and_write_frame.h" + +#include "podio/ROOTFrameReader.h" +#include "podio/ROOTFrameWriter.h" + +int main() { + return rewrite_frames("example_frame.root", "rewritten_frame.root") + + read_rewritten_frames("rewritten_frame.root"); +} diff --git a/tests/sio_io/CMakeLists.txt b/tests/sio_io/CMakeLists.txt index 6eecd6d90..c5a15b34f 100644 --- a/tests/sio_io/CMakeLists.txt +++ b/tests/sio_io/CMakeLists.txt @@ -6,7 +6,9 @@ set(sio_dependent_tests read_timed_sio.cpp read_frame_sio.cpp write_frame_sio.cpp - read_frame_legacy_sio.cpp) + read_frame_legacy_sio.cpp + read_and_write_frame_sio.cpp +) set(sio_libs podio::podioSioIO) foreach( sourcefile ${sio_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${sio_libs}") @@ -21,8 +23,16 @@ target_link_libraries(read_timed_sio PRIVATE ROOT::Tree) set_property(TEST read_sio PROPERTY DEPENDS write_sio) set_property(TEST read_and_write_sio PROPERTY DEPENDS write_sio) set_property(TEST read_timed_sio PROPERTY DEPENDS write_timed_sio) -set_property(TEST read_frame_sio PROPERTY DEPENDS write_frame_sio) set_property(TEST read_frame_legacy_sio PROPERTY DEPENDS write_sio) +set_tests_properties( + read_frame_sio + read_and_write_frame_sio + + PROPERTIES + DEPENDS + write_frame_sio +) + add_test(NAME check_benchmark_outputs_sio COMMAND check_benchmark_outputs write_benchmark_sio.root read_benchmark_sio.root) set_property(TEST check_benchmark_outputs_sio PROPERTY DEPENDS read_timed_sio write_timed_sio) diff --git a/tests/sio_io/read_and_write_frame_sio.cpp b/tests/sio_io/read_and_write_frame_sio.cpp new file mode 100644 index 000000000..27a2ae76c --- /dev/null +++ b/tests/sio_io/read_and_write_frame_sio.cpp @@ -0,0 +1,9 @@ +#include "read_and_write_frame.h" + +#include "podio/SIOFrameReader.h" +#include "podio/SIOFrameWriter.h" + +int main() { + return rewrite_frames("example_frame.sio", "rewritten_frame.sio") + + read_rewritten_frames("rewritten_frame.sio"); +} diff --git a/tests/unittests/unittest.cpp b/tests/unittests/unittest.cpp index 6903a2e2b..cc87410f7 100644 --- a/tests/unittests/unittest.cpp +++ b/tests/unittests/unittest.cpp @@ -773,20 +773,6 @@ TEST_CASE("Move-only collections", "[collections][move-semantics]") { auto newClusters = std::move(clusterColl); vecMemColl.prepareForWrite(); - auto buffers = vecMemColl.getBuffers(); - auto vecBuffers = buffers.vectorMembers; - auto thisVec = (*vecBuffers)[0].second; - - const auto floatVec = podio::CollectionWriteBuffers::asVector(thisVec); - const auto floatVec2 = podio::CollectionReadBuffers::asVector(thisVec); - - std::cout << floatVec->size() << '\n'; - std::cout << floatVec2->size() << '\n'; - - // auto vecBuffers = buffers.vectorMembers; - // const auto vecBuffer = podio::CollectionWriteBuffers::asVector((*vecBuffers)[0].second); - // TD td; - // REQUIRE(vecBuffer->size() == 2); auto newVecMems = std::move(vecMemColl); userDataColl.prepareForWrite(); @@ -796,6 +782,19 @@ TEST_CASE("Move-only collections", "[collections][move-semantics]") { } SECTION("Moved collections can be prepared") { + auto newHits = std::move(hitColl); + newHits.prepareForWrite(); + + auto newClusters = std::move(clusterColl); + newClusters.prepareForWrite(); + + auto newVecMems = std::move(vecMemColl); + newVecMems.prepareForWrite(); + + auto newUserData = std::move(userDataColl); + newUserData.prepareForWrite(); + + checkCollections(newHits, newClusters, newVecMems, newUserData); } SECTION("Prepared collections can be move assigned") { diff --git a/tests/write_frame.h b/tests/write_frame.h index e4405f988..d80871d2e 100644 --- a/tests/write_frame.h +++ b/tests/write_frame.h @@ -215,6 +215,15 @@ auto createVectorMemberCollection(int i) { return vecs; } +auto createVectorMemberSubsetCollection(const ExampleWithVectorMemberCollection& coll) { + ExampleWithVectorMemberCollection refs; + refs.setSubsetCollection(); + + refs.push_back(coll[0]); + + return refs; +} + auto createInfoCollection(int i) { EventInfoCollection info; @@ -361,7 +370,8 @@ podio::Frame makeFrame(int iFrame) { podio::Frame frame{}; frame.put(createArrayCollection(iFrame), "arrays"); - frame.put(createVectorMemberCollection(iFrame), "WithVectorMember"); + const auto& vecMemColl = frame.put(createVectorMemberCollection(iFrame), "WithVectorMember"); + frame.put(createVectorMemberSubsetCollection(vecMemColl), "VectorMemberSubsetColl"); frame.put(createInfoCollection(iFrame), "info"); frame.put(createFixedWidthCollection(), "fixedWidthInts"); From 7807b15b4eaf8dca43fdc763465f7f3ac4df8052 Mon Sep 17 00:00:00 2001 From: Ananya2003Gupta Date: Thu, 17 Aug 2023 14:59:11 +0530 Subject: [PATCH 099/100] Rebased with master branch --- python/podio/test_MemberParser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/podio/test_MemberParser.py b/python/podio/test_MemberParser.py index 43f4a0c12..220a6fb73 100644 --- a/python/podio/test_MemberParser.py +++ b/python/podio/test_MemberParser.py @@ -1,3 +1,4 @@ + #!/usr/bin/env python """ Tests for the MemberParser highlighting what it is currently capable of and @@ -6,7 +7,7 @@ import unittest -from podio_config_reader import MemberParser, DefinitionError +from podio.podio_config_reader import MemberParser, DefinitionError class MemberParserTest(unittest.TestCase): @@ -216,6 +217,7 @@ def test_parse_invalid(self): try: self.assertRaises(DefinitionError, parser.parse, inp) except AssertionError: + # pylint: disable-next=raise-missing-from raise AssertionError(f"'{inp}' should raise a DefinitionError from the MemberParser") def test_parse_valid_no_description(self): From cb94c5b21a46409db8263163ff12ee2ce8dd184c Mon Sep 17 00:00:00 2001 From: Ananya2003Gupta Date: Thu, 17 Aug 2023 15:19:01 +0530 Subject: [PATCH 100/100] Fixed rebase conflict --- python/podio_class_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index c089fb87d..3764b0249 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -150,6 +150,7 @@ def process(self): self._process_datatype(name, datatype) self._write_edm_def_file() + self._get_namespace_dict() if 'ROOT' in self.io_handlers: self._create_selection_xml()