From 0b57a4d6e7dec4543943a19bab172c4eba62eae3 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 3 Apr 2023 17:04:56 +0200 Subject: [PATCH 01/31] Remove unused fields --- include/podio/CollectionBuffers.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/podio/CollectionBuffers.h b/include/podio/CollectionBuffers.h index 8846af162..6d4e797d9 100644 --- a/include/podio/CollectionBuffers.h +++ b/include/podio/CollectionBuffers.h @@ -43,9 +43,7 @@ struct CollectionWriteBuffers { }; struct CollectionReadBuffers { - bool needsSchemaEvolution{false}; void* data{nullptr}; - void* data_oldschema{nullptr}; CollRefCollection* references{nullptr}; VectorMembersInfo* vectorMembers{nullptr}; From 074a0e5c8cac7c4efbde166014a2c302bbdcd085 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 3 May 2023 16:47:44 +0200 Subject: [PATCH 02/31] Add SchemaEvolution singleton to hold evolution functions --- include/podio/CollectionBuffers.h | 1 + include/podio/Frame.h | 13 +++++- include/podio/SchemaEvolution.h | 50 +++++++++++++++++++++- src/CMakeLists.txt | 1 + src/SchemaEvolution.cc | 69 +++++++++++++++++++++++++++++++ 5 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 src/SchemaEvolution.cc diff --git a/include/podio/CollectionBuffers.h b/include/podio/CollectionBuffers.h index 6d4e797d9..20e79ed20 100644 --- a/include/podio/CollectionBuffers.h +++ b/include/podio/CollectionBuffers.h @@ -46,6 +46,7 @@ struct CollectionReadBuffers { void* data{nullptr}; CollRefCollection* references{nullptr}; VectorMembersInfo* vectorMembers{nullptr}; + SchemaVersionT schemaVersion{0}; using CreateFuncT = std::function(podio::CollectionReadBuffers, bool)>; using RecastFuncT = std::function; diff --git a/include/podio/Frame.h b/include/podio/Frame.h index 56ffe9cc2..92b9d4f20 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -2,9 +2,11 @@ #define PODIO_FRAME_H #include "podio/CollectionBase.h" +#include "podio/CollectionBufferFactory.h" #include "podio/CollectionIDTable.h" #include "podio/GenericParameters.h" #include "podio/ICollectionProvider.h" +#include "podio/SchemaEvolution.h" #include "podio/utilities/TypeHelpers.h" #include @@ -366,7 +368,16 @@ podio::CollectionBase* Frame::FrameModel::doGet(const std::string& n buffers = unpack(m_data.get(), name); } if (buffers) { - auto coll = buffers->createCollection(buffers.value(), buffers->data == nullptr); + std::unique_ptr coll{nullptr}; + // Subset collections do not need schema evolution (by definition) + if (buffers->data == nullptr) { + coll = buffers->createCollection(buffers.value(), true); + } else { + auto evolvedBuffers = podio::SchemaEvolution::instance().evolveBuffers(buffers.value(), buffers->schemaVersion, + /*todo*/ "collType"); + coll = evolvedBuffers.createCollection(evolvedBuffers, false); + } + coll->prepareAfterRead(); coll->setID(m_idTable.collectionID(name)); { diff --git a/include/podio/SchemaEvolution.h b/include/podio/SchemaEvolution.h index fd77fddb6..d9f7db041 100644 --- a/include/podio/SchemaEvolution.h +++ b/include/podio/SchemaEvolution.h @@ -2,6 +2,10 @@ #define PODIO_SCHEMAEVOLUTION_H #include +#include +#include +#include +#include namespace podio { @@ -9,6 +13,50 @@ enum class Backend { ROOT, SIO }; using SchemaVersionT = uint32_t; +struct CollectionReadBuffers; + +class SchemaEvolution { + using EvolutionFuncT = std::function; + using VersionMapT = std::vector; + using EvolutionMapT = std::unordered_map; + +public: + /** + * Enum to make it possible to prioritize evolution functions during + * registration, making AutoGenerated lower priority than UserDefined + */ + enum class Priority { AutoGenerated = 0, UserDefined = 1 }; + + SchemaEvolution(const SchemaEvolution&) = delete; + SchemaEvolution& operator=(const SchemaEvolution&) = delete; + SchemaEvolution(SchemaEvolution&&) = delete; + SchemaEvolution& operator=(SchemaEvolution&&) = delete; + + ~SchemaEvolution() = default; + + static SchemaEvolution& mutInstance(); + static SchemaEvolution const& instance(); + + podio::CollectionReadBuffers evolveBuffers(podio::CollectionReadBuffers oldBuffers, SchemaVersionT fromVersion, + const std::string& collType) const; + + /** + * Register an evoution function. Make the priority UserDefine dy default, so + * that users do not have to add that argument, and instead make code + * generation put the AutoGenerated value there + * + * NOTE: Currently necessary to fill the auto generated ones in the correct + * order of fromVersion (becoming larger). + */ + void registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, + const EvolutionFuncT& evolutionFunc, Priority priority = Priority::UserDefined); + +private: + SchemaEvolution() = default; + + EvolutionMapT m_evolutionFuncs{}; +}; + } // namespace podio -#endif \ No newline at end of file +#endif // PODIO_SCHEMAEVOLUTION_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7f71864e3..394f23057 100755 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -54,6 +54,7 @@ SET(core_sources UserDataCollection.cc CollectionBufferFactory.cc MurmurHash3.cpp + SchemaEvolution.cc ) SET(core_headers diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc new file mode 100644 index 000000000..5c35b6262 --- /dev/null +++ b/src/SchemaEvolution.cc @@ -0,0 +1,69 @@ +#include "podio/SchemaEvolution.h" +#include "podio/CollectionBuffers.h" + +#include + +namespace podio { + +SchemaEvolution& SchemaEvolution::mutInstance() { + static SchemaEvolution instance; + return instance; +} + +SchemaEvolution const& SchemaEvolution::instance() { + return mutInstance(); +} + +podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(podio::CollectionReadBuffers oldBuffers, + SchemaVersionT fromVersion, + const std::string& collType) const { + // TODO: where do we get the current version from? + // if (fromVersion == currentVersion) { + // return oldBuffers; + // } + + if (const auto typeIt = m_evolutionFuncs.find(collType); typeIt != m_evolutionFuncs.end()) { + const auto& versionMap = typeIt->second; + if (versionMap.size() >= fromVersion) { + return versionMap[fromVersion - 1](oldBuffers, fromVersion); + } + } + + // TODO: Warning for this + return oldBuffers; +} + +void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, + const EvolutionFuncT& evolutionFunc, Priority priority) { + auto typeIt = m_evolutionFuncs.find(collType); + if (typeIt != m_evolutionFuncs.end()) { + auto& versionMap = typeIt->second; + + const auto prevSize = versionMap.size(); + if (prevSize < fromVersion) { + versionMap.resize(fromVersion); + for (auto i = prevSize; i < fromVersion; ++i) { + versionMap[i] = evolutionFunc; + } + } else { + if (priority == Priority::UserDefined) { + versionMap[fromVersion] = evolutionFunc; + } else { + std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; + } + } + + } else { + // We do not know about this type yet, so we create an entirely new map + // and populate all versions with this evolution function + VersionMapT versionMap; + versionMap.reserve(fromVersion); + for (size_t i = 0; i < fromVersion; ++i) { + versionMap.emplace_back(evolutionFunc); + } + + m_evolutionFuncs.emplace(collType, std::move(versionMap)); + } +} + +} // namespace podio From df470ed20ecb17cfd22a2760d6243d94c622a3a4 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Mon, 8 May 2023 17:33:38 -0400 Subject: [PATCH 03/31] Inject type information into collection buffers --- include/podio/CollectionBuffers.h | 6 ++++-- include/podio/Frame.h | 2 +- include/podio/SchemaEvolution.h | 1 + python/templates/Collection.cc.jinja2 | 1 + src/UserDataCollection.cc | 5 ++++- 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/podio/CollectionBuffers.h b/include/podio/CollectionBuffers.h index 20e79ed20..bde5394d8 100644 --- a/include/podio/CollectionBuffers.h +++ b/include/podio/CollectionBuffers.h @@ -47,15 +47,17 @@ struct CollectionReadBuffers { CollRefCollection* references{nullptr}; VectorMembersInfo* vectorMembers{nullptr}; SchemaVersionT schemaVersion{0}; + std::string_view type{}; using CreateFuncT = std::function(podio::CollectionReadBuffers, bool)>; using RecastFuncT = std::function; - CollectionReadBuffers(void* d, CollRefCollection* ref, VectorMembersInfo* vec, CreateFuncT&& createFunc, - RecastFuncT&& recastFunc) : + CollectionReadBuffers(void* d, CollRefCollection* ref, VectorMembersInfo* vec, std::string_view typ, + CreateFuncT&& createFunc, RecastFuncT&& recastFunc) : data(d), references(ref), vectorMembers(vec), + type(typ), createCollection(std::move(createFunc)), recast(std::move(recastFunc)) { } diff --git a/include/podio/Frame.h b/include/podio/Frame.h index 92b9d4f20..a71bba336 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -374,7 +374,7 @@ podio::CollectionBase* Frame::FrameModel::doGet(const std::string& n coll = buffers->createCollection(buffers.value(), true); } else { auto evolvedBuffers = podio::SchemaEvolution::instance().evolveBuffers(buffers.value(), buffers->schemaVersion, - /*todo*/ "collType"); + std::string(buffers->type)); coll = evolvedBuffers.createCollection(evolvedBuffers, false); } diff --git a/include/podio/SchemaEvolution.h b/include/podio/SchemaEvolution.h index d9f7db041..bdd277246 100644 --- a/include/podio/SchemaEvolution.h +++ b/include/podio/SchemaEvolution.h @@ -37,6 +37,7 @@ class SchemaEvolution { static SchemaEvolution& mutInstance(); static SchemaEvolution const& instance(); + // TODO: Make this take a string_view podio::CollectionReadBuffers evolveBuffers(podio::CollectionReadBuffers oldBuffers, SchemaVersionT fromVersion, const std::string& collType) const; diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 1789a83f6..84e747e8f 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -170,6 +170,7 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const { namespace { podio::CollectionReadBuffers createBuffers(bool isSubset) { auto readBuffers = podio::CollectionReadBuffers{}; + readBuffers.type = "{{ class.full_type }}Collection"; readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; // The number of ObjectID vectors is either 1 or the sum of OneToMany and diff --git a/src/UserDataCollection.cc b/src/UserDataCollection.cc index 71ea34d48..e30c89e95 100644 --- a/src/UserDataCollection.cc +++ b/src/UserDataCollection.cc @@ -19,7 +19,10 @@ namespace { // Register with schema version 1 to allow for potential changes CollectionBufferFactory::mutInstance().registerCreationFunc( userDataCollTypeName(), UserDataCollection::schemaVersion, [](bool) { - return podio::CollectionReadBuffers{new std::vector(), nullptr, nullptr, + return podio::CollectionReadBuffers{new std::vector(), + nullptr, + nullptr, + podio::userDataCollTypeName(), [](podio::CollectionReadBuffers buffers, bool) { return std::make_unique>( std::move(*buffers.dataAsVector())); From 7fee6f2eb811f8227df5a66bbc67364b6bbdb443 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 9 May 2023 09:50:19 -0400 Subject: [PATCH 04/31] Inject current schema version into buffers from buffer factory --- include/podio/CollectionBuffers.h | 5 +++-- python/templates/Collection.cc.jinja2 | 1 + src/UserDataCollection.cc | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/podio/CollectionBuffers.h b/include/podio/CollectionBuffers.h index bde5394d8..37ee07fe4 100644 --- a/include/podio/CollectionBuffers.h +++ b/include/podio/CollectionBuffers.h @@ -52,11 +52,12 @@ struct CollectionReadBuffers { using CreateFuncT = std::function(podio::CollectionReadBuffers, bool)>; using RecastFuncT = std::function; - CollectionReadBuffers(void* d, CollRefCollection* ref, VectorMembersInfo* vec, std::string_view typ, - CreateFuncT&& createFunc, RecastFuncT&& recastFunc) : + CollectionReadBuffers(void* d, CollRefCollection* ref, VectorMembersInfo* vec, SchemaVersionT version, + std::string_view typ, CreateFuncT&& createFunc, RecastFuncT&& recastFunc) : data(d), references(ref), vectorMembers(vec), + schemaVersion(version), type(typ), createCollection(std::move(createFunc)), recast(std::move(recastFunc)) { diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 84e747e8f..3a7615d83 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -171,6 +171,7 @@ namespace { podio::CollectionReadBuffers createBuffers(bool isSubset) { auto readBuffers = podio::CollectionReadBuffers{}; readBuffers.type = "{{ class.full_type }}Collection"; + readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion; readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; // The number of ObjectID vectors is either 1 or the sum of OneToMany and diff --git a/src/UserDataCollection.cc b/src/UserDataCollection.cc index e30c89e95..8763d13a1 100644 --- a/src/UserDataCollection.cc +++ b/src/UserDataCollection.cc @@ -22,6 +22,7 @@ namespace { return podio::CollectionReadBuffers{new std::vector(), nullptr, nullptr, + podio::UserDataCollection::schemaVersion, podio::userDataCollTypeName(), [](podio::CollectionReadBuffers buffers, bool) { return std::make_unique>( From 2ba52f8dcc0e0e9b14ca59595cee8fe40321a7b8 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Mon, 8 May 2023 18:40:42 -0400 Subject: [PATCH 05/31] [wip] Start populating SchemaEvolution registry --- include/podio/SchemaEvolution.h | 12 +++++++++++ src/SchemaEvolution.cc | 35 +++++++++++++++++++++++++++------ src/UserDataCollection.cc | 4 ++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/include/podio/SchemaEvolution.h b/include/podio/SchemaEvolution.h index bdd277246..2e5682e91 100644 --- a/include/podio/SchemaEvolution.h +++ b/include/podio/SchemaEvolution.h @@ -20,6 +20,11 @@ class SchemaEvolution { using VersionMapT = std::vector; using EvolutionMapT = std::unordered_map; + // Tag struct for marking collection types that do not need schema evolution. + // Necessary due to the fact that we determine the current version from the + // number of schema evolution functions we have + struct NoSchemaEvolutionNecessaryT {}; + public: /** * Enum to make it possible to prioritize evolution functions during @@ -27,6 +32,8 @@ class SchemaEvolution { */ enum class Priority { AutoGenerated = 0, UserDefined = 1 }; + static constexpr auto NoSchemaEvolutionNecessary = NoSchemaEvolutionNecessaryT{}; + SchemaEvolution(const SchemaEvolution&) = delete; SchemaEvolution& operator=(const SchemaEvolution&) = delete; SchemaEvolution(SchemaEvolution&&) = delete; @@ -48,10 +55,15 @@ class SchemaEvolution { * * NOTE: Currently necessary to fill the auto generated ones in the correct * order of fromVersion (becoming larger). + * + * NOTE: The number of available evolution functions for a given type defines + * the current version! Make sure to populate this properly */ void registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, const EvolutionFuncT& evolutionFunc, Priority priority = Priority::UserDefined); + void registerEvolutionFunc(const std::string& collType, NoSchemaEvolutionNecessaryT); + private: SchemaEvolution() = default; diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index 5c35b6262..f0fdbccaf 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -17,19 +17,31 @@ SchemaEvolution const& SchemaEvolution::instance() { podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(podio::CollectionReadBuffers oldBuffers, SchemaVersionT fromVersion, const std::string& collType) const { - // TODO: where do we get the current version from? - // if (fromVersion == currentVersion) { - // return oldBuffers; - // } if (const auto typeIt = m_evolutionFuncs.find(collType); typeIt != m_evolutionFuncs.end()) { const auto& versionMap = typeIt->second; - if (versionMap.size() >= fromVersion) { + // The current schema version is defined by the number of evolution functions we have + const auto currentVersion = versionMap.size(); + std::cerr << "PODIO WARNING: evolveBuffers " << collType << " current " << currentVersion << " buffer version " + << fromVersion << std::endl; + + if (currentVersion == fromVersion) { + return oldBuffers; // Nothing to do here + } + + if (fromVersion < currentVersion) { return versionMap[fromVersion - 1](oldBuffers, fromVersion); } + + std::cerr + << "PODIO WARNING: evolveBuffers called with fromVersion that is greater than the current schema version for " + << collType << std::endl; + // TODO: exception? + return oldBuffers; } - // TODO: Warning for this + std::cerr << "PODIO WARNING: evolveBuffers has no knowledge of how to evolve buffers for " << collType << std::endl; + // TODO: exception return oldBuffers; } @@ -66,4 +78,15 @@ void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaV } } +void SchemaEvolution::registerEvolutionFunc(const std::string& collType, NoSchemaEvolutionNecessaryT) { + auto typeIt = m_evolutionFuncs.find(collType); + if (typeIt != m_evolutionFuncs.end()) { + std::runtime_error( + "Cannot mark a type for not needing schema evolution, if it already has schema evolution functions defined"); + } + + // TODO: How to guard this agains accidental overwriting later? + m_evolutionFuncs.emplace(collType, VersionMapT{}); +} + } // namespace podio diff --git a/src/UserDataCollection.cc b/src/UserDataCollection.cc index 8763d13a1..811e0d994 100644 --- a/src/UserDataCollection.cc +++ b/src/UserDataCollection.cc @@ -1,6 +1,7 @@ #include "podio/UserDataCollection.h" #include "podio/CollectionBufferFactory.h" #include "podio/CollectionBuffers.h" +#include "podio/SchemaEvolution.h" #include #include @@ -33,6 +34,9 @@ namespace { }}; }); + podio::SchemaEvolution::mutInstance().registerEvolutionFunc(podio::userDataCollTypeName(), + podio::SchemaEvolution::NoSchemaEvolutionNecessary); + return 1; } From 8942b3dfc063f793160dea08b555b5909206412f Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 9 May 2023 12:18:01 -0400 Subject: [PATCH 06/31] [wip] Split registration into two steps Easier this way to get the current version information into the whole system --- include/podio/SchemaEvolution.h | 24 ++++++-- python/templates/Collection.cc.jinja2 | 5 ++ src/SchemaEvolution.cc | 86 ++++++++++++--------------- src/UserDataCollection.cc | 4 +- 4 files changed, 64 insertions(+), 55 deletions(-) diff --git a/include/podio/SchemaEvolution.h b/include/podio/SchemaEvolution.h index 2e5682e91..321c95565 100644 --- a/include/podio/SchemaEvolution.h +++ b/include/podio/SchemaEvolution.h @@ -17,8 +17,21 @@ struct CollectionReadBuffers; class SchemaEvolution { using EvolutionFuncT = std::function; - using VersionMapT = std::vector; - using EvolutionMapT = std::unordered_map; + using EvolFuncVersionMapT = std::vector; + + // Helper struct combining the current schema version of each type and an + // index into the schema evolution "map" below + struct MapIndex { + SchemaVersionT currentVersion; + size_t index; + constexpr static size_t NoEvolutionAvailable = -1u; + }; + + // The map that holds the current version for each type that is known to the + // registry + using VersionMapT = std::unordered_map; + // The "map" that holds all evolution functions + using EvolutionMapT = std::vector; // Tag struct for marking collection types that do not need schema evolution. // Necessary due to the fact that we determine the current version from the @@ -55,18 +68,19 @@ class SchemaEvolution { * * NOTE: Currently necessary to fill the auto generated ones in the correct * order of fromVersion (becoming larger). - * - * NOTE: The number of available evolution functions for a given type defines - * the current version! Make sure to populate this properly */ void registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, const EvolutionFuncT& evolutionFunc, Priority priority = Priority::UserDefined); void registerEvolutionFunc(const std::string& collType, NoSchemaEvolutionNecessaryT); + // Register the current version + void registerCurrentVersion(const std::string& collType, SchemaVersionT currentVersion); + private: SchemaEvolution() = default; + VersionMapT m_versionMapIndices{}; EvolutionMapT m_evolutionFuncs{}; }; diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 3a7615d83..c3f8d5e79 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -4,6 +4,7 @@ // AUTOMATICALLY GENERATED FILE - DO NOT EDIT #include "podio/CollectionBufferFactory.h" +#include "podio/SchemaEvolution.h" #include "{{ incfolder }}{{ class.bare_type }}Collection.h" #include "{{ incfolder }}DatamodelDefinition.h" @@ -216,6 +217,10 @@ bool registerCollection() { const static auto reg = []() { auto& factory = podio::CollectionBufferFactory::mutInstance(); factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers); + + auto& schemaEvolution = podio::SchemaEvolution::mutInstance(); + schemaEvolution.registerCurrentVersion("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion); + return true; }(); return reg; diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index f0fdbccaf..cae328a01 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -17,27 +17,18 @@ SchemaEvolution const& SchemaEvolution::instance() { podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(podio::CollectionReadBuffers oldBuffers, SchemaVersionT fromVersion, const std::string& collType) const { - - if (const auto typeIt = m_evolutionFuncs.find(collType); typeIt != m_evolutionFuncs.end()) { - const auto& versionMap = typeIt->second; - // The current schema version is defined by the number of evolution functions we have - const auto currentVersion = versionMap.size(); - std::cerr << "PODIO WARNING: evolveBuffers " << collType << " current " << currentVersion << " buffer version " - << fromVersion << std::endl; - - if (currentVersion == fromVersion) { + if (const auto typeIt = m_versionMapIndices.find(collType); typeIt != m_versionMapIndices.end()) { + const auto [currentVersion, mapIndex] = typeIt->second; + if (fromVersion == currentVersion) { return oldBuffers; // Nothing to do here } - if (fromVersion < currentVersion) { - return versionMap[fromVersion - 1](oldBuffers, fromVersion); + const auto& typeEvolFuncs = m_evolutionFuncs[mapIndex]; + if (fromVersion < typeEvolFuncs.size() - 1) { + // Do we need this check? In principle we could ensure at registration + // time that this is always guaranteed + return typeEvolFuncs[fromVersion - 1](oldBuffers, fromVersion); } - - std::cerr - << "PODIO WARNING: evolveBuffers called with fromVersion that is greater than the current schema version for " - << collType << std::endl; - // TODO: exception? - return oldBuffers; } std::cerr << "PODIO WARNING: evolveBuffers has no knowledge of how to evolve buffers for " << collType << std::endl; @@ -47,46 +38,45 @@ podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(podio::CollectionRea void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, const EvolutionFuncT& evolutionFunc, Priority priority) { - auto typeIt = m_evolutionFuncs.find(collType); - if (typeIt != m_evolutionFuncs.end()) { - auto& versionMap = typeIt->second; + auto typeIt = m_versionMapIndices.find(collType); + if (typeIt == m_versionMapIndices.end()) { + std::cerr << "PODIO ERROR: trying to register a schema evolution function for " << collType + << " which is not a type known to the schema evolution registry" << std::endl; + return; + } - const auto prevSize = versionMap.size(); - if (prevSize < fromVersion) { - versionMap.resize(fromVersion); - for (auto i = prevSize; i < fromVersion; ++i) { - versionMap[i] = evolutionFunc; - } - } else { - if (priority == Priority::UserDefined) { - versionMap[fromVersion] = evolutionFunc; - } else { - std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; - } - } + auto& [currentVersion, mapIndex] = typeIt->second; + // If we do not have any evolution funcs yet, create the necessary mapping + // structure and update the index + if (typeIt->second.index == MapIndex::NoEvolutionAvailable) { + mapIndex = m_evolutionFuncs.size(); + m_evolutionFuncs.emplace_back(EvolFuncVersionMapT{}); + } + + auto& versionMap = m_evolutionFuncs[mapIndex]; + const auto prevSize = versionMap.size(); + if (prevSize < fromVersion) { + versionMap.resize(fromVersion); + for (auto i = prevSize; i < fromVersion; ++i) { + versionMap[i] = evolutionFunc; + } } else { - // We do not know about this type yet, so we create an entirely new map - // and populate all versions with this evolution function - VersionMapT versionMap; - versionMap.reserve(fromVersion); - for (size_t i = 0; i < fromVersion; ++i) { - versionMap.emplace_back(evolutionFunc); + if (priority == Priority::UserDefined) { + versionMap[fromVersion] = evolutionFunc; + } else { + std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; } - - m_evolutionFuncs.emplace(collType, std::move(versionMap)); } } -void SchemaEvolution::registerEvolutionFunc(const std::string& collType, NoSchemaEvolutionNecessaryT) { - auto typeIt = m_evolutionFuncs.find(collType); - if (typeIt != m_evolutionFuncs.end()) { - std::runtime_error( - "Cannot mark a type for not needing schema evolution, if it already has schema evolution functions defined"); +void SchemaEvolution::registerCurrentVersion(const std::string& collType, SchemaVersionT currentVersion) { + if (auto typeIt = m_versionMapIndices.find(collType); typeIt != m_versionMapIndices.end()) { + // TODO: warn about this? In principle all of this should only be called once + typeIt->second.currentVersion = currentVersion; } - // TODO: How to guard this agains accidental overwriting later? - m_evolutionFuncs.emplace(collType, VersionMapT{}); + m_versionMapIndices.emplace(collType, MapIndex{currentVersion, MapIndex::NoEvolutionAvailable}); } } // namespace podio diff --git a/src/UserDataCollection.cc b/src/UserDataCollection.cc index 811e0d994..52b69ae49 100644 --- a/src/UserDataCollection.cc +++ b/src/UserDataCollection.cc @@ -34,8 +34,8 @@ namespace { }}; }); - podio::SchemaEvolution::mutInstance().registerEvolutionFunc(podio::userDataCollTypeName(), - podio::SchemaEvolution::NoSchemaEvolutionNecessary); + podio::SchemaEvolution::mutInstance().registerCurrentVersion(podio::userDataCollTypeName(), + UserDataCollection::schemaVersion); return 1; } From 60f24d3878ab53da96181cc6a40ae7a6aa3aaaf4 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 11 May 2023 09:35:01 -0400 Subject: [PATCH 07/31] [wip] Require registration of each evolution function Remove two step registration again --- include/podio/SchemaEvolution.h | 111 ++++++++++++++++++++------ python/templates/Collection.cc.jinja2 | 3 - src/SchemaEvolution.cc | 32 ++++---- src/UserDataCollection.cc | 7 +- 4 files changed, 104 insertions(+), 49 deletions(-) diff --git a/include/podio/SchemaEvolution.h b/include/podio/SchemaEvolution.h index 321c95565..37a97955d 100644 --- a/include/podio/SchemaEvolution.h +++ b/include/podio/SchemaEvolution.h @@ -11,33 +11,49 @@ namespace podio { enum class Backend { ROOT, SIO }; +/// The type used for schema versions throughout using SchemaVersionT = uint32_t; struct CollectionReadBuffers; +/** + * The SchemaEvolution registry holds evolution functions that allow to + * transform CollectionReadBuffers of known datatypes from a previous schema + * version to the current schema version. From the evolved buffers it is then + * possible to create collections. + * + * It is implemented as a singleton that is populated at the time shared + * datamodel libraries (or their schema evolution libraries) are loaded. It is + * assumed that this happens early on in the startup of any application, such + * that the registration still happens on a single thread. After this + * initialization evolutions can be done from multiple threads. + */ class SchemaEvolution { + /// The interface of any evolution function takes buffers and a version and + /// returns buffers. using EvolutionFuncT = std::function; + /// Each datatype gets its own version "map" where the index defines the + /// version from which the schema evolution has to start to end up in the + /// current version using EvolFuncVersionMapT = std::vector; - // Helper struct combining the current schema version of each type and an - // index into the schema evolution "map" below + /** + * Helper struct combining the current schema version of each type and an + * index into the schema evolution "map" below + */ struct MapIndex { - SchemaVersionT currentVersion; - size_t index; + SchemaVersionT currentVersion; ///< The current schema version for each type + size_t index; ///< The index in the evolution function map + /// Tombstone value indicating that no evolution function is available (yet) constexpr static size_t NoEvolutionAvailable = -1u; }; - // The map that holds the current version for each type that is known to the - // registry + /// The map that holds the current version for each type that is known to the + /// registry using VersionMapT = std::unordered_map; - // The "map" that holds all evolution functions + /// The "map" that holds all evolution functions using EvolutionMapT = std::vector; - // Tag struct for marking collection types that do not need schema evolution. - // Necessary due to the fact that we determine the current version from the - // number of schema evolution functions we have - struct NoSchemaEvolutionNecessaryT {}; - public: /** * Enum to make it possible to prioritize evolution functions during @@ -45,42 +61,85 @@ class SchemaEvolution { */ enum class Priority { AutoGenerated = 0, UserDefined = 1 }; - static constexpr auto NoSchemaEvolutionNecessary = NoSchemaEvolutionNecessaryT{}; - + /// The SchemaEvolution registry is a singleton so we disable all copy and + /// move constructors explicitly SchemaEvolution(const SchemaEvolution&) = delete; SchemaEvolution& operator=(const SchemaEvolution&) = delete; SchemaEvolution(SchemaEvolution&&) = delete; SchemaEvolution& operator=(SchemaEvolution&&) = delete; - ~SchemaEvolution() = default; + /// Mutable instance only used for the initial registration of functions + /// during library loading static SchemaEvolution& mutInstance(); + /// Get the instance for evolving buffers static SchemaEvolution const& instance(); - // TODO: Make this take a string_view + /** + * Evolve the passed in buffers to the current version of the datatype that + * can be constructed from them. + * + * Internally this will first check if the schema version of the buffers is + * already the current one and in that case immediately return the passed in + * buffers again as they do not need schema evolution. If that is not the case + * it will look up the correct evolution function for the passed in version + * and call that on the passed in buffers. + * + * @param oldBuffers The buffers to be evolved + * @param fromVersion The schema version of the buffers + * @param collType The fully qualified collection type + * + * @returns CollectionReadBuffers that have been evolved to the current + * version. NOTE that these could also be the input buffers. + */ podio::CollectionReadBuffers evolveBuffers(podio::CollectionReadBuffers oldBuffers, SchemaVersionT fromVersion, const std::string& collType) const; /** - * Register an evoution function. Make the priority UserDefine dy default, so - * that users do not have to add that argument, and instead make code - * generation put the AutoGenerated value there + * Register an evoution function for a given collection type and given + * versions from where to where the evolution applies. + * + * Several assumptions are in place here: + * + * - The current version has to be the same for all invocations for a given + * datatype. + * - An evolution function has to be registered for all possible versions from + * 1 to N - 1, where N is the current version + * - An evolution function can only be registerd once for a given datatype and + * fromVersion + * - For auto generated code the passed in priority has to be AutoGenerated + * otherwise it might override user defined functions + * - Even if a datatype does not require schema evolution it has to register + * an evolution function (e.g. the noOpSchemaEvolution below) in order to be + * known to the internal map. * - * NOTE: Currently necessary to fill the auto generated ones in the correct - * order of fromVersion (becoming larger). + * @param collType The fully qualified collection data type + * @param fromVersion The version from which this evolution function should + * apply + * @param currentVersion The current schema version for the data type + * @param evolutionFunc The evolution function that evolves passed in buffers + * from fromVersion to currrentVersion + * @param priority The priority of this evolution function. Defaults to + * UserDefined which overrides auto generated functionality. */ - void registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, + void registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, SchemaVersionT currentVersion, const EvolutionFuncT& evolutionFunc, Priority priority = Priority::UserDefined); - void registerEvolutionFunc(const std::string& collType, NoSchemaEvolutionNecessaryT); - - // Register the current version - void registerCurrentVersion(const std::string& collType, SchemaVersionT currentVersion); + /** + * A no-op schema evolution function that returns the buffers unchanged. + * + * This can be used for registering an evolution function for datatypes that + * do not require schema evolution, but need to register themselves with the + * registry + */ + static podio::CollectionReadBuffers noOpSchemaEvolution(podio::CollectionReadBuffers buffers, SchemaVersionT); private: SchemaEvolution() = default; + /// The map containing types and MapIndex structs VersionMapT m_versionMapIndices{}; + /// The "map" holding the evolution functions EvolutionMapT m_evolutionFuncs{}; }; diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index c3f8d5e79..88a18b305 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -218,9 +218,6 @@ bool registerCollection() { auto& factory = podio::CollectionBufferFactory::mutInstance(); factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers); - auto& schemaEvolution = podio::SchemaEvolution::mutInstance(); - schemaEvolution.registerCurrentVersion("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion); - return true; }(); return reg; diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index cae328a01..966d2e328 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -37,46 +37,42 @@ podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(podio::CollectionRea } void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaVersionT fromVersion, - const EvolutionFuncT& evolutionFunc, Priority priority) { + SchemaVersionT currentVersion, const EvolutionFuncT& evolutionFunc, + Priority priority) { auto typeIt = m_versionMapIndices.find(collType); if (typeIt == m_versionMapIndices.end()) { - std::cerr << "PODIO ERROR: trying to register a schema evolution function for " << collType - << " which is not a type known to the schema evolution registry" << std::endl; - return; + // Create an entry for this type + std::tie(typeIt, std::ignore) = + m_versionMapIndices.emplace(collType, MapIndex{currentVersion, MapIndex::NoEvolutionAvailable}); } - auto& [currentVersion, mapIndex] = typeIt->second; - // If we do not have any evolution funcs yet, create the necessary mapping // structure and update the index if (typeIt->second.index == MapIndex::NoEvolutionAvailable) { - mapIndex = m_evolutionFuncs.size(); + typeIt->second.index = m_evolutionFuncs.size(); m_evolutionFuncs.emplace_back(EvolFuncVersionMapT{}); } + // From here on out we don't need the mutabale any longer + const auto& [_, mapIndex] = typeIt->second; + auto& versionMap = m_evolutionFuncs[mapIndex]; const auto prevSize = versionMap.size(); if (prevSize < fromVersion) { versionMap.resize(fromVersion); - for (auto i = prevSize; i < fromVersion; ++i) { - versionMap[i] = evolutionFunc; - } + versionMap[fromVersion - 1] = evolutionFunc; } else { if (priority == Priority::UserDefined) { - versionMap[fromVersion] = evolutionFunc; + versionMap[fromVersion - 1] = evolutionFunc; } else { std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; } } } -void SchemaEvolution::registerCurrentVersion(const std::string& collType, SchemaVersionT currentVersion) { - if (auto typeIt = m_versionMapIndices.find(collType); typeIt != m_versionMapIndices.end()) { - // TODO: warn about this? In principle all of this should only be called once - typeIt->second.currentVersion = currentVersion; - } - - m_versionMapIndices.emplace(collType, MapIndex{currentVersion, MapIndex::NoEvolutionAvailable}); +podio::CollectionReadBuffers SchemaEvolution::noOpSchemaEvolution(podio::CollectionReadBuffers buffers, + SchemaVersionT) { + return buffers; } } // namespace podio diff --git a/src/UserDataCollection.cc b/src/UserDataCollection.cc index 52b69ae49..c965f5b9c 100644 --- a/src/UserDataCollection.cc +++ b/src/UserDataCollection.cc @@ -34,8 +34,11 @@ namespace { }}; }); - podio::SchemaEvolution::mutInstance().registerCurrentVersion(podio::userDataCollTypeName(), - UserDataCollection::schemaVersion); + // For now passing the same schema version for from and current versions + // just to make SchemaEvolution aware of UserDataCollections. + podio::SchemaEvolution::mutInstance().registerEvolutionFunc( + podio::userDataCollTypeName(), UserDataCollection::schemaVersion, UserDataCollection::schemaVersion, + SchemaEvolution::noOpSchemaEvolution, SchemaEvolution::Priority::AutoGenerated); return 1; } From 64d148c6a62e5df2282ad868efae9d407ef1e807 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 14 Jun 2023 18:53:42 +0200 Subject: [PATCH 08/31] [clang-tidy] Mark inputs as const& for now --- include/podio/SchemaEvolution.h | 4 ++-- src/SchemaEvolution.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/podio/SchemaEvolution.h b/include/podio/SchemaEvolution.h index 37a97955d..6b50aa4f5 100644 --- a/include/podio/SchemaEvolution.h +++ b/include/podio/SchemaEvolution.h @@ -92,7 +92,7 @@ class SchemaEvolution { * @returns CollectionReadBuffers that have been evolved to the current * version. NOTE that these could also be the input buffers. */ - podio::CollectionReadBuffers evolveBuffers(podio::CollectionReadBuffers oldBuffers, SchemaVersionT fromVersion, + podio::CollectionReadBuffers evolveBuffers(const podio::CollectionReadBuffers& oldBuffers, SchemaVersionT fromVersion, const std::string& collType) const; /** @@ -132,7 +132,7 @@ class SchemaEvolution { * do not require schema evolution, but need to register themselves with the * registry */ - static podio::CollectionReadBuffers noOpSchemaEvolution(podio::CollectionReadBuffers buffers, SchemaVersionT); + static podio::CollectionReadBuffers noOpSchemaEvolution(podio::CollectionReadBuffers&& buffers, SchemaVersionT); private: SchemaEvolution() = default; diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index 966d2e328..2377e5833 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -14,7 +14,7 @@ SchemaEvolution const& SchemaEvolution::instance() { return mutInstance(); } -podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(podio::CollectionReadBuffers oldBuffers, +podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(const podio::CollectionReadBuffers& oldBuffers, SchemaVersionT fromVersion, const std::string& collType) const { if (const auto typeIt = m_versionMapIndices.find(collType); typeIt != m_versionMapIndices.end()) { @@ -70,7 +70,7 @@ void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaV } } -podio::CollectionReadBuffers SchemaEvolution::noOpSchemaEvolution(podio::CollectionReadBuffers buffers, +podio::CollectionReadBuffers SchemaEvolution::noOpSchemaEvolution(podio::CollectionReadBuffers&& buffers, SchemaVersionT) { return buffers; } From 9e006fdf62543660f55b0535be3fef14e7c3463d Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 15 Jun 2023 09:47:32 +0200 Subject: [PATCH 09/31] Create schema_evolution test subdirectory and build old datamodel --- tests/CMakeLists.txt | 5 ++--- tests/schema_evolution/CMakeLists.txt | 14 ++++++++++++++ tests/{ => schema_evolution}/datalayout_old.yaml | 4 ++-- tests/{ => schema_evolution}/schema_evolution.yaml | 0 4 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 tests/schema_evolution/CMakeLists.txt rename tests/{ => schema_evolution}/datalayout_old.yaml (98%) rename tests/{ => schema_evolution}/schema_evolution.yaml (100%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index de0d282e6..394dee4bc 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,9 +10,7 @@ SET(podio_PYTHON_DIR ${CMAKE_SOURCE_DIR}/python) PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} - OLD_DESCRIPTION datalayout_old.yaml - SCHEMA_EVOLUTION schema_evolution.yaml - ) + ) # Use the cmake building blocks to add the different parts (conditionally) PODIO_ADD_DATAMODEL_CORE_LIB(TestDataModel "${headers}" "${sources}") @@ -51,6 +49,7 @@ add_subdirectory(root_io) add_subdirectory(sio_io) add_subdirectory(unittests) add_subdirectory(dumpmodel) +add_subdirectory(schema_evolution) # Tests that don't fit into one of the broad categories above CREATE_PODIO_TEST(ostream_operator.cpp "") diff --git a/tests/schema_evolution/CMakeLists.txt b/tests/schema_evolution/CMakeLists.txt new file mode 100644 index 000000000..ef738f3cd --- /dev/null +++ b/tests/schema_evolution/CMakeLists.txt @@ -0,0 +1,14 @@ +PODIO_GENERATE_DATAMODEL(datamodel datalayout_old.yaml headers sources + IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} + OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_old +) + +PODIO_ADD_DATAMODEL_CORE_LIB(TestDataModel_v1 "${headers}" "${sources}" + OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_old +) + +PODIO_ADD_ROOT_IO_DICT(TestDataModel_v1Dict TestDataModel_v1 "${headers}" ${CMAKE_CURRENT_BINARY_DIR}/datamodel_old/src/selection.xml + OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_old +) + +PODIO_ADD_SIO_IO_BLOCKS(TestDataModel_v1 "${headers}" "${sources}") diff --git a/tests/datalayout_old.yaml b/tests/schema_evolution/datalayout_old.yaml similarity index 98% rename from tests/datalayout_old.yaml rename to tests/schema_evolution/datalayout_old.yaml index eea733ffa..b6aa605b8 100755 --- a/tests/datalayout_old.yaml +++ b/tests/schema_evolution/datalayout_old.yaml @@ -22,8 +22,8 @@ components : # can also add c'tors: ExtraCode : declaration: " - SimpleStruct() : x(0),y(0),z(0) {}\n - SimpleStruct( const int* v) : x(v[0]),y(v[1]),z(v[2]) {} + SimpleStruct() : x(0),z(0) {}\n + SimpleStruct( const int* v) : x(v[0]),z(v[2]) {} " NotSoSimpleStruct: diff --git a/tests/schema_evolution.yaml b/tests/schema_evolution/schema_evolution.yaml similarity index 100% rename from tests/schema_evolution.yaml rename to tests/schema_evolution/schema_evolution.yaml From 14c201031fa9a8c1cb8f1ce53ce32ea618dee219 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 15 Jun 2023 11:15:55 +0200 Subject: [PATCH 10/31] Add first simple tests for "trivial" schema evolution --- tests/CTestCustom.cmake | 3 + tests/schema_evolution/CMakeLists.txt | 20 +++++++ tests/schema_evolution/README.md | 18 ++++++ tests/schema_evolution/datalayout_old.yaml | 1 - tests/schema_evolution/read_new_data.h | 51 +++++++++++++++++ tests/schema_evolution/root_io/CMakeLists.txt | 4 ++ .../root_io/read_new_data_root.cpp | 7 +++ .../root_io/write_old_data_root.cpp | 7 +++ tests/schema_evolution/write_old_data.h | 55 +++++++++++++++++++ 9 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 tests/schema_evolution/README.md create mode 100644 tests/schema_evolution/read_new_data.h create mode 100644 tests/schema_evolution/root_io/CMakeLists.txt create mode 100644 tests/schema_evolution/root_io/read_new_data_root.cpp create mode 100644 tests/schema_evolution/root_io/write_old_data_root.cpp create mode 100644 tests/schema_evolution/write_old_data.h diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index f435ebc7c..5dd3d86d9 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -58,6 +58,9 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ datamodel_def_store_roundtrip_root_extension datamodel_def_store_roundtrip_sio datamodel_def_store_roundtrip_sio_extension + + write_old_data_root + read_new_data_root ) foreach(version in @legacy_versions@) diff --git a/tests/schema_evolution/CMakeLists.txt b/tests/schema_evolution/CMakeLists.txt index ef738f3cd..e76867a13 100644 --- a/tests/schema_evolution/CMakeLists.txt +++ b/tests/schema_evolution/CMakeLists.txt @@ -12,3 +12,23 @@ PODIO_ADD_ROOT_IO_DICT(TestDataModel_v1Dict TestDataModel_v1 "${headers}" ${CMAK ) PODIO_ADD_SIO_IO_BLOCKS(TestDataModel_v1 "${headers}" "${sources}") + +#--- Helper function to create a test in an environment that is suitable to +#--- write data in an old schema version +function(PODIO_CREATE_WRITE_OLD_DATA_TEST sourcefile additional_libs) + string( REPLACE ".cpp" "" name ${sourcefile} ) + add_executable( ${name} ${sourcefile} ) + add_test(NAME ${name} COMMAND ${name}) + + target_link_libraries(${name} PRIVATE TestDataModel_v1 ${additional_libs}) + target_include_directories(${name} PRIVATE ${CMAKE_SOURCE_DIR}/tests/schema_evolution) + + set_property(TEST ${name} + PROPERTY ENVIRONMENT + LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/src:${CMAKE_CURRENT_BINARY_DIR}:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + PODIO_SIO_BLOCK=${CMAKE_CURRENT_BINARY_DIR} + ROOT_INCLUDE_PATH=${CMAKE_CURRENT_BINARY_DIR}/datamodel_old:${CMAKE_SOURCE_DIR}/include + ) +endfunction() + +add_subdirectory(root_io) diff --git a/tests/schema_evolution/README.md b/tests/schema_evolution/README.md new file mode 100644 index 000000000..0b55945ab --- /dev/null +++ b/tests/schema_evolution/README.md @@ -0,0 +1,18 @@ +# Schema Evolution tests +This folder contains tests for the schema evolution functionality in podio. The +functionality is tested by first writing data with an old schema version and +then reading in with the current schema version. +[`datalayout_old.yaml`](./datalayout_old.yaml) holds the definition of the old +version, using schema version 1, while the +[`datalayout.yaml`](../datalayout.yaml) that is also used for the other I/O +tests is used as the current version (schema version 2). + +## Differences between the two schema versions +Since it is not immediately visible from the test code this list contains the +differences between the two schema versions, and also how this evolution is +tested (if it is supported) + +| component / datatype | difference from v1 to v2 | purpose of test | tested with | +|--|--|--|--| +| `SimpleStruct` | no `int y` member in v1 | Addition of new members in components | As member of `ExampleWithArrayComponent` | +| `ExampleHit` | no `double energy` member in v1 | Addition of new members in datatypes | Directly via `ExampleHit` | diff --git a/tests/schema_evolution/datalayout_old.yaml b/tests/schema_evolution/datalayout_old.yaml index b6aa605b8..514a75b72 100755 --- a/tests/schema_evolution/datalayout_old.yaml +++ b/tests/schema_evolution/datalayout_old.yaml @@ -71,7 +71,6 @@ datatypes : - double x // x-coordinate - double y // y-coordinate - double z // z-coordinate - - double energy // measured energy deposit ExampleMC : Description : "Example MC-particle" diff --git a/tests/schema_evolution/read_new_data.h b/tests/schema_evolution/read_new_data.h new file mode 100644 index 000000000..e31e3c3e8 --- /dev/null +++ b/tests/schema_evolution/read_new_data.h @@ -0,0 +1,51 @@ +#ifndef PODIO_TESTS_SCHEMAEVOLUTION_READNEWDATA_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_SCHEMAEVOLUTION_READNEWDATA_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include "datamodel/ExampleHitCollection.h" +#include "datamodel/ExampleWithArrayComponentCollection.h" + +#include "podio/Frame.h" + +#include +#include + +int readSimpleStruct(const podio::Frame& event) { + const auto& coll = event.get("simpleStructTest"); + auto elem = coll[0]; + const auto sstruct = elem.s(); + + if (sstruct.y != 0 || sstruct.x != 42 || sstruct.z != 123) { + return 1; + } + return 0; +} + +int readExampleHit(const podio::Frame& event) { + const auto& coll = event.get("datatypeMemberAdditionTest"); + auto elem = coll[0]; + + if (elem.energy() != 0) { + return 1; + } + if (elem.x() != 1.23 || elem.y() != 1.23 || elem.z() != 1.23 || elem.cellID() != 0xcaffee) { + return 1; + } + + return 0; +} + +template +int read_new_data(const std::string& filename) { + ReaderT reader{}; + reader.openFile(filename); + + const auto event = podio::Frame(reader.readEntry("events", 0)); + + int result = 0; + result += readSimpleStruct(event); + result += readExampleHit(event); + + return result; +} + +#endif // PODIO_TESTS_SCHEMAEVOLUTION_WRITEOLDDATA_H diff --git a/tests/schema_evolution/root_io/CMakeLists.txt b/tests/schema_evolution/root_io/CMakeLists.txt new file mode 100644 index 000000000..e756cc01a --- /dev/null +++ b/tests/schema_evolution/root_io/CMakeLists.txt @@ -0,0 +1,4 @@ +PODIO_CREATE_WRITE_OLD_DATA_TEST(write_old_data_root.cpp "TestDataModel_v1Dict;podio::podioRootIO") + +CREATE_PODIO_TEST(read_new_data_root.cpp "TestDataModelDict;podioRootIO") +set_property(TEST read_new_data_root PROPERTY DEPENDS write_old_data_root) diff --git a/tests/schema_evolution/root_io/read_new_data_root.cpp b/tests/schema_evolution/root_io/read_new_data_root.cpp new file mode 100644 index 000000000..8bf874e78 --- /dev/null +++ b/tests/schema_evolution/root_io/read_new_data_root.cpp @@ -0,0 +1,7 @@ +#include "schema_evolution/read_new_data.h" + +#include "podio/ROOTFrameReader.h" + +int main() { + return read_new_data("example_data_old_schema.root"); +} diff --git a/tests/schema_evolution/root_io/write_old_data_root.cpp b/tests/schema_evolution/root_io/write_old_data_root.cpp new file mode 100644 index 000000000..c5fc23880 --- /dev/null +++ b/tests/schema_evolution/root_io/write_old_data_root.cpp @@ -0,0 +1,7 @@ +#include "write_old_data.h" + +#include "podio/ROOTFrameWriter.h" + +int main() { + return writeData("example_data_old_schema.root"); +} diff --git a/tests/schema_evolution/write_old_data.h b/tests/schema_evolution/write_old_data.h new file mode 100644 index 000000000..651359c90 --- /dev/null +++ b/tests/schema_evolution/write_old_data.h @@ -0,0 +1,55 @@ +#ifndef PODIO_TESTS_SCHEMAEVOLUTION_WRITEOLDDATA_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TESTS_SCHEMAEVOLUTION_WRITEOLDDATA_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include "datamodel/ExampleHitCollection.h" +#include "datamodel/ExampleWithArrayComponentCollection.h" + +#include "podio/Frame.h" + +#include + +/// This is a datatype that holds a SimpleStruct component +auto writeSimpleStruct() { + ExampleWithArrayComponentCollection coll; + auto elem = coll.create(); + auto sstruct = SimpleStruct(); + sstruct.x = 42; + sstruct.z = 123; + elem.s(sstruct); + + return coll; +} + +auto writeExampleHit() { + ExampleHitCollection coll; + auto elem = coll.create(); + elem.x(1.23); + elem.y(1.23); + elem.z(1.23); + elem.cellID(0xcaffee); + + return coll; +} + +podio::Frame createFrame() { + podio::Frame event; + + event.put(writeSimpleStruct(), "simpleStructTest"); + event.put(writeExampleHit(), "dataTypeMemberAdditionTest"); + + return event; +} + +template +int writeData(const std::string& filename) { + WriterT writer(filename); + + auto event = createFrame(); + writer.writeFrame(event, "events"); + + writer.finish(); + + return 0; +} + +#endif // PODIO_TESTS_SCHEMAEVOLUTION_WRITEOLDDATA_H From 3ccd157fb2da66bd6d239da3f8385705151dd2e0 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 15 Jun 2023 16:19:10 +0200 Subject: [PATCH 11/31] Fix test environment and typo --- python/templates/Collection.cc.jinja2 | 10 ++++++++++ tests/schema_evolution/CMakeLists.txt | 4 ++-- tests/schema_evolution/write_old_data.h | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 88a18b305..f7b60c586 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -218,6 +218,16 @@ bool registerCollection() { auto& factory = podio::CollectionBufferFactory::mutInstance(); factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers); + // Make the SchemaEvolution registry aware of the current version by + // registering a no-op function with it + podio::SchemaEvolution::mutInstance().registerEvolutionFunc( + "{{ class.full_type }}Collection", + {{ package_name }}::meta::schemaVersion, + {{ package_name }}::meta::schemaVersion, + podio::SchemaEvolution::noOpSchemaEvolution, + podio::SchemaEvolution::Priority::AutoGenerated + ); + return true; }(); return reg; diff --git a/tests/schema_evolution/CMakeLists.txt b/tests/schema_evolution/CMakeLists.txt index e76867a13..3a6ecc659 100644 --- a/tests/schema_evolution/CMakeLists.txt +++ b/tests/schema_evolution/CMakeLists.txt @@ -25,9 +25,9 @@ function(PODIO_CREATE_WRITE_OLD_DATA_TEST sourcefile additional_libs) set_property(TEST ${name} PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/src:${CMAKE_CURRENT_BINARY_DIR}:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/src:${CMAKE_BINARY_DIR}/tests/schema_evolution:$:$<$:$>:$ENV{LD_LIBRARY_PATH} PODIO_SIO_BLOCK=${CMAKE_CURRENT_BINARY_DIR} - ROOT_INCLUDE_PATH=${CMAKE_CURRENT_BINARY_DIR}/datamodel_old:${CMAKE_SOURCE_DIR}/include + ROOT_INCLUDE_PATH=${CMAKE_BINARY_DIR}/tests/schema_evolution/datamodel_old:${CMAKE_SOURCE_DIR}/include ) endfunction() diff --git a/tests/schema_evolution/write_old_data.h b/tests/schema_evolution/write_old_data.h index 651359c90..3367f2fa9 100644 --- a/tests/schema_evolution/write_old_data.h +++ b/tests/schema_evolution/write_old_data.h @@ -35,7 +35,7 @@ podio::Frame createFrame() { podio::Frame event; event.put(writeSimpleStruct(), "simpleStructTest"); - event.put(writeExampleHit(), "dataTypeMemberAdditionTest"); + event.put(writeExampleHit(), "datatypeMemberAdditionTest"); return event; } From 557f34795bb76c7704622a960b674ed43528e6fa Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 20 Jun 2023 20:00:27 +0200 Subject: [PATCH 12/31] Add failing test for renamed member variables --- tests/schema_evolution/README.md | 1 + tests/schema_evolution/read_new_data.h | 36 ++++++++++++++++++------- tests/schema_evolution/write_old_data.h | 11 ++++++++ 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/tests/schema_evolution/README.md b/tests/schema_evolution/README.md index 0b55945ab..4ed849b48 100644 --- a/tests/schema_evolution/README.md +++ b/tests/schema_evolution/README.md @@ -16,3 +16,4 @@ tested (if it is supported) |--|--|--|--| | `SimpleStruct` | no `int y` member in v1 | Addition of new members in components | As member of `ExampleWithArrayComponent` | | `ExampleHit` | no `double energy` member in v1 | Addition of new members in datatypes | Directly via `ExampleHit` | +| `ex2::NamespaceStruct` | renaming of `y_old` to `y` | Renaming of member variables | As member of `ex42::ExampleWithNamespace` | diff --git a/tests/schema_evolution/read_new_data.h b/tests/schema_evolution/read_new_data.h index e31e3c3e8..59ed31550 100644 --- a/tests/schema_evolution/read_new_data.h +++ b/tests/schema_evolution/read_new_data.h @@ -3,20 +3,28 @@ #include "datamodel/ExampleHitCollection.h" #include "datamodel/ExampleWithArrayComponentCollection.h" +#include "datamodel/ExampleWithNamespaceCollection.h" #include "podio/Frame.h" #include #include +#define ASSERT_EQUAL(actual, expected, msg) \ + if ((expected) != (actual)) { \ + std::cerr << __PRETTY_FUNCTION__ << ": " << msg << " (expected: " << expected << ", actual: " << actual << ")"; \ + return 1; \ + } + int readSimpleStruct(const podio::Frame& event) { const auto& coll = event.get("simpleStructTest"); auto elem = coll[0]; const auto sstruct = elem.s(); - if (sstruct.y != 0 || sstruct.x != 42 || sstruct.z != 123) { - return 1; - } + ASSERT_EQUAL(sstruct.y, 0, "New component member not 0 initialized"); + ASSERT_EQUAL(sstruct.x, 42, "Existing component member changed"); + ASSERT_EQUAL(sstruct.z, 123, "Existing component member changed"); + return 0; } @@ -24,12 +32,21 @@ int readExampleHit(const podio::Frame& event) { const auto& coll = event.get("datatypeMemberAdditionTest"); auto elem = coll[0]; - if (elem.energy() != 0) { - return 1; - } - if (elem.x() != 1.23 || elem.y() != 1.23 || elem.z() != 1.23 || elem.cellID() != 0xcaffee) { - return 1; - } + ASSERT_EQUAL(elem.energy(), 0, "New datatype member variable not 0 initialized"); + ASSERT_EQUAL(elem.x(), 1.23, "Member variables unrelated to schema evolution have changed"); + ASSERT_EQUAL(elem.y(), 1.23, "Member variables unrelated to schema evolution have changed"); + ASSERT_EQUAL(elem.z(), 1.23, "Member variables unrelated to schema evolution have changed"); + ASSERT_EQUAL(elem.cellID(), 0xcaffee, "Member variables unrelated to schema evolution have changed"); + + return 0; +} + +int readExampleWithNamespace(const podio::Frame& event) { + const auto& coll = event.get("componentMemberRenameTest"); + auto elem = coll[0]; + + ASSERT_EQUAL(elem.y(), 42, "Renamed component member variable does not have the expected value"); + ASSERT_EQUAL(elem.x(), 123, "Member variables unrelated to schema evolution have changed"); return 0; } @@ -44,6 +61,7 @@ int read_new_data(const std::string& filename) { int result = 0; result += readSimpleStruct(event); result += readExampleHit(event); + result += readExampleWithNamespace(event); return result; } diff --git a/tests/schema_evolution/write_old_data.h b/tests/schema_evolution/write_old_data.h index 3367f2fa9..9232e59af 100644 --- a/tests/schema_evolution/write_old_data.h +++ b/tests/schema_evolution/write_old_data.h @@ -3,6 +3,7 @@ #include "datamodel/ExampleHitCollection.h" #include "datamodel/ExampleWithArrayComponentCollection.h" +#include "datamodel/ExampleWithNamespaceCollection.h" #include "podio/Frame.h" @@ -31,11 +32,21 @@ auto writeExampleHit() { return coll; } +auto writeExampleWithNamespace() { + ex42::ExampleWithNamespaceCollection coll; + auto elem = coll.create(); + elem.y_old(42); + elem.x(123); + + return coll; +} + podio::Frame createFrame() { podio::Frame event; event.put(writeSimpleStruct(), "simpleStructTest"); event.put(writeExampleHit(), "datatypeMemberAdditionTest"); + event.put(writeExampleWithNamespace(), "componentMemberRenameTest"); return event; } From b65c6eecde5101834497cd71a98881d5fb19bee4 Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Thu, 6 Jul 2023 10:05:38 +0200 Subject: [PATCH 13/31] move Collection::createBuffers template into macro --- python/templates/Collection.cc.jinja2 | 46 +------------------ python/templates/macros/collections.jinja2 | 52 +++++++++++++++++++++- tests/CMakeLists.txt | 2 + 3 files changed, 54 insertions(+), 46 deletions(-) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 9e8b5429b..f8a62432e 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -165,51 +165,7 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const { return {{ package_name }}::meta::schemaVersion; } -// anonymous namespace for registration with the CollectionBufferFactory. This -// ensures that we don't have to make up arbitrary namespace names here, since -// none of this is publicly visible -namespace { -podio::CollectionReadBuffers createBuffers(bool isSubset) { - auto readBuffers = podio::CollectionReadBuffers{}; - readBuffers.type = "{{ class.full_type }}Collection"; - readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion; - readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; - - // The number of ObjectID vectors is either 1 or the sum of OneToMany and - // OneToOne relations - const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }}; - readBuffers.references = new podio::CollRefCollection(nRefs); - for (auto& ref : *readBuffers.references) { - // Make sure to place usable buffer pointers here - ref = std::make_unique>(); - } - - readBuffers.vectorMembers = new podio::VectorMembersInfo(); - if (!isSubset) { - readBuffers.vectorMembers->reserve({{ VectorMembers | length }}); -{% for member in VectorMembers %} - readBuffers.vectorMembers->emplace_back("{{ member.full_type }}", new std::vector<{{ member.full_type }}>); -{% endfor %} - } - - readBuffers.createCollection = [](podio::CollectionReadBuffers buffers, bool isSubsetColl) { - {{ collection_type }}Data data(buffers, isSubsetColl); - return std::make_unique<{{ collection_type }}>(std::move(data), isSubsetColl); - }; - - readBuffers.recast = [](podio::CollectionReadBuffers& buffers) { - if (buffers.data) { - buffers.data = podio::CollectionWriteBuffers::asVector<{{ class.full_type }}Data>(buffers.data); - } -{% if VectorMembers %} -{% for member in VectorMembers %} - (*buffers.vectorMembers)[{{ loop.index0 }}].second = podio::CollectionWriteBuffers::asVector<{{ member.full_type }}>((*buffers.vectorMembers)[{{ loop.index0 }}].second); -{% endfor %} -{% endif %} - }; - - return readBuffers; -} + {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, 1) }} // The usual trick with an IIFE and a static variable inside a funtion and then // making sure to call that function during shared library loading diff --git a/python/templates/macros/collections.jinja2 b/python/templates/macros/collections.jinja2 index d07abad19..d1159bcb8 100644 --- a/python/templates/macros/collections.jinja2 +++ b/python/templates/macros/collections.jinja2 @@ -151,4 +151,54 @@ void {{ class.bare_type }}Collection::print(std::ostream& os, bool flush) const os.flush(); } } -{%- endmacro %} +{% endmacro %} + +{% macro createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, schemaVersion) %} + +// anonymous namespace for registration with the CollectionBufferFactory. This +// ensures that we don't have to make up arbitrary namespace names here, since +// none of this is publicly visible +namespace { +podio::CollectionReadBuffers createBuffers(bool isSubset) { + auto readBuffers = podio::CollectionReadBuffers{}; + readBuffers.type = "{{ class.full_type }}Collection"; + readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion; + readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; + + // The number of ObjectID vectors is either 1 or the sum of OneToMany and + // OneToOne relations + const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }}; + readBuffers.references = new podio::CollRefCollection(nRefs); + for (auto& ref : *readBuffers.references) { + // Make sure to place usable buffer pointers here + ref = std::make_unique>(); + } + + readBuffers.vectorMembers = new podio::VectorMembersInfo(); + if (!isSubset) { + readBuffers.vectorMembers->reserve({{ VectorMembers | length }}); +{% for member in VectorMembers %} + readBuffers.vectorMembers->emplace_back("{{ member.full_type }}", new std::vector<{{ member.full_type }}>); +{% endfor %} + } + + readBuffers.createCollection = [](podio::CollectionReadBuffers buffers, bool isSubsetColl) { + {{ collection_type }}Data data(buffers, isSubsetColl); + return std::make_unique<{{ collection_type }}>(std::move(data), isSubsetColl); + }; + + readBuffers.recast = [](podio::CollectionReadBuffers& buffers) { + if (buffers.data) { + buffers.data = podio::CollectionWriteBuffers::asVector<{{ class.full_type }}Data>(buffers.data); + } +{% if VectorMembers %} +{% for member in VectorMembers %} + (*buffers.vectorMembers)[{{ loop.index0 }}].second = podio::CollectionWriteBuffers::asVector<{{ member.full_type }}>((*buffers.vectorMembers)[{{ loop.index0 }}].second); +{% endfor %} +{% endif %} + }; + + return readBuffers; +} + +{% endmacro %} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 12cff4125..3490898fc 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,6 +10,8 @@ SET(podio_PYTHON_DIR ${CMAKE_SOURCE_DIR}/python) PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} + OLD_DESCRIPTION schema_evolution/datalayout_old.yaml + SCHEMA_EVOLUTION schema_evolution/schema_evolution.yaml ) # Use the cmake building blocks to add the different parts (conditionally) From 2807c55600d2d7c64cabb8881ea5696a224f72bb Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Fri, 7 Jul 2023 22:56:00 +0200 Subject: [PATCH 14/31] creating components and datatypes for explicit schema evolution --- python/podio_class_generator.py | 87 ++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 6 deletions(-) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 4f86f2a45..819567a22 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- """Podio class generator script""" +import copy import os import sys import subprocess @@ -16,6 +17,7 @@ from podio.podio_config_reader import PodioConfigReader from podio.generator_utils import DataType, DefinitionError, DataModelJSONEncoder from podio_schema_evolution import DataModelComparator # dealing with cyclic imports +from podio_schema_evolution import RenamedMember, root_filter THIS_DIR = os.path.dirname(os.path.abspath(__file__)) TEMPLATE_DIR = os.path.join(THIS_DIR, 'templates') @@ -89,9 +91,14 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr # schema evolution specific code self.old_yamlfile = old_description self.evolution_file = evolution_file + self.old_schema_version = None self.old_datamodel = None self.old_datamodels_components = set() self.old_datamodels_datatypes = set() + self.root_schema_evolution_dict = {} # containing the root relevant schema evolution per datatype + # information to update the selection.xml + self.root_schema_evolution_component_names = set() + self.root_schema_evolution_datatype_names = set() try: self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm) @@ -115,9 +122,10 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr def process(self): """Run the actual generation""" + self.preprocess_schema_evolution() + for name, component in self.datamodel.components.items(): self._process_component(name, component) - for name, datatype in self.datamodel.datatypes.items(): self._process_datatype(name, datatype) @@ -127,11 +135,10 @@ def process(self): self._create_selection_xml() self._write_cmake_lists_file() - self.process_schema_evolution() self.print_report() - def process_schema_evolution(self): + def preprocess_schema_evolution(self): """Process the schema evolution""" # have to make all necessary comparisons # which are the ones that changed? @@ -141,7 +148,7 @@ def process_schema_evolution(self): evolution_file=self.evolution_file) comparator.read() comparator.compare() - + self.old_schema_version = "v%i" % comparator.datamodel_old.schema_version # some sanity checks if len(comparator.errors) > 0: print(f"The given datamodels '{self.yamlfile}' and '{self.old_yamlfile}' \ @@ -156,6 +163,17 @@ def process_schema_evolution(self): print(warning) sys.exit(-1) + # now go through all the io_handlers and see what we have to do + if 'ROOT' in self.io_handlers: + for item in root_filter(comparator.schema_changes): + schema_evolutions = self.root_schema_evolution_dict.get(item.klassname) + if (schema_evolutions is None): + schema_evolutions = [] + self.root_schema_evolution_dict[item.klassname] = schema_evolutions + + # add whatever is relevant to our ROOT schema evolutions + self.root_schema_evolution_dict[item.klassname].append(item) + def print_report(self): """Print a summary report about the generated code""" if not self.verbose: @@ -248,12 +266,69 @@ def _process_component(self, name, component): component['includes'] = self._sort_includes(includes) component['class'] = DataType(name) - self._fill_templates('Component', component) + # Add potentially older schema for schema evolution + # based on ROOT capabilities for now + if name in self.root_schema_evolution_dict.keys(): + schema_evolutions = self.root_schema_evolution_dict[name] + component = copy.deepcopy(component) + for schema_evolution in schema_evolutions: + if isinstance(schema_evolution, RenamedMember): + for member in component['Members']: + if member.name == schema_evolution.member_name_new: + member.name = schema_evolution.member_name_old + component['class'] = DataType(name + self.old_schema_version) + else: + raise NotImplementedError + self._fill_templates('Component', component) + self.root_schema_evolution_component_names.add(name + self.old_schema_version) + + def _replaceComponentInPaths(self, oldname, newname, paths): + # strip the namespace + shortoldname = oldname.split("::")[-1] + shortnewname = newname.split("::")[-1] + # and do the replace in place + for index, thePath in enumerate(paths): + if shortoldname in thePath: + newPath = thePath.replace(shortoldname, shortnewname) + paths[index] = newPath + def _process_datatype(self, name, definition): """Process one datatype""" datatype = self._preprocess_datatype(name, definition) + + # ROOT schema evolution preparation + # Compute and prepare the potential schema evolution parts + schema_evolution_datatype = copy.deepcopy(datatype) + needs_schema_evolution = False + # check whether it has a renamed member + # if name in self.root_schema_evolution_dict.keys(): + # for member in schema_evolution_datatype['Members']: + # if + # then check for components with a renamed member + for member in schema_evolution_datatype['Members']: + if member.is_array: + if member.array_type in self.root_schema_evolution_dict.keys(): + needs_schema_evolution = True + self._replaceComponentInPaths(member.array_type, member.array_type + self.old_schema_version, + schema_evolution_datatype['includes_data']) + member.full_type = member.full_type.replace(member.array_type, member.array_type + self.old_schema_version) + member.array_type = member.array_type + self.old_schema_version + + else: + if member.full_type in self.root_schema_evolution_dict.keys(): + needs_schema_evolution = True + self._replaceComponentInPaths(member.full_type, member.full_type + self.old_schema_version, + schema_evolution_datatype['includes']) + member.full_type = member.full_type + self.old_schema_version + + if needs_schema_evolution: + print(" Preparing explicit schema evolution for %s" % (name)) + schema_evolution_datatype['class'].bare_type = schema_evolution_datatype['class'].bare_type + self.old_schema_version # noqa + self._fill_templates('Data', schema_evolution_datatype) + self.root_schema_evolution_datatype_names.add(name + self.old_schema_version) + self._fill_templates('Data', datatype) self._fill_templates('Object', datatype) self._fill_templates('MutableObject', datatype) @@ -487,7 +562,7 @@ def _create_selection_xml(self): data = {'components': [DataType(c) for c in self.datamodel.components], 'datatypes': [DataType(d) for d in self.datamodel.datatypes], 'old_schema_components': [DataType(d) for d in - self.old_datamodels_datatypes | self.old_datamodels_components]} + self.root_schema_evolution_datatype_names | self.root_schema_evolution_component_names]} # noqa self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data)) def _build_include(self, member): From 20e30d0e08ad990e7478238097adf583888ba1b6 Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Mon, 10 Jul 2023 17:38:04 +0200 Subject: [PATCH 15/31] add more schema evolution code generation --- python/podio_class_generator.py | 21 ++++++++++++++++----- python/podio_schema_evolution.py | 2 +- python/templates/Collection.cc.jinja2 | 18 +++++++++++++++++- python/templates/CollectionData.h.jinja2 | 5 +++++ python/templates/macros/collections.jinja2 | 15 +++++++++------ 5 files changed, 48 insertions(+), 13 deletions(-) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 819567a22..69f75c3e6 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -149,6 +149,7 @@ def preprocess_schema_evolution(self): comparator.read() comparator.compare() self.old_schema_version = "v%i" % comparator.datamodel_old.schema_version + self.old_schema_version_int = comparator.datamodel_old.schema_version # some sanity checks if len(comparator.errors) > 0: print(f"The given datamodels '{self.yamlfile}' and '{self.old_yamlfile}' \ @@ -188,8 +189,15 @@ def print_report(self): print(summaryline) print() - def _eval_template(self, template, data): + def _eval_template(self, template, data, old_schema_data=None): """Fill the specified template""" + # merge the info of data and the old schema into a single dict + if old_schema_data: + data['OneToOneRelations_old'] = old_schema_data['OneToOneRelations'] + data['OneToManyRelations_old'] = old_schema_data['OneToManyRelations'] + data['VectorMembers_old'] = old_schema_data['VectorMembers'] + data['old_schema_version'] = self.old_schema_version_int + return self.env.get_template(template).render(data) def _write_file(self, name, content): @@ -239,7 +247,7 @@ def get_fn_format(tmpl): return fn_templates - def _fill_templates(self, template_base, data): + def _fill_templates(self, template_base, data, old_schema_data=None): """Fill the template and write the results to file""" # Update the passed data with some global things that are the same for all # files @@ -248,7 +256,7 @@ def _fill_templates(self, template_base, data): data['incfolder'] = self.incfolder for filename, template in self._get_filenames_templates(template_base, data['class'].bare_type): - self._write_file(filename, self._eval_template(template, data)) + self._write_file(filename, self._eval_template(template, data, old_schema_data)) def _process_component(self, name, component): """Process one component""" @@ -325,9 +333,12 @@ def _process_datatype(self, name, definition): if needs_schema_evolution: print(" Preparing explicit schema evolution for %s" % (name)) - schema_evolution_datatype['class'].bare_type = schema_evolution_datatype['class'].bare_type + self.old_schema_version # noqa + schema_evolution_datatype['class'].bare_type = schema_evolution_datatype['class'].bare_type + self.old_schema_version # noqa self._fill_templates('Data', schema_evolution_datatype) self.root_schema_evolution_datatype_names.add(name + self.old_schema_version) + self._fill_templates('Collection', datatype, schema_evolution_datatype) + else: + self._fill_templates('Collection', datatype) self._fill_templates('Data', datatype) self._fill_templates('Object', datatype) @@ -562,7 +573,7 @@ def _create_selection_xml(self): data = {'components': [DataType(c) for c in self.datamodel.components], 'datatypes': [DataType(d) for d in self.datamodel.datatypes], 'old_schema_components': [DataType(d) for d in - self.root_schema_evolution_datatype_names | self.root_schema_evolution_component_names]} # noqa + self.root_schema_evolution_datatype_names | self.root_schema_evolution_component_names]} # noqa self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data)) def _build_include(self, member): diff --git a/python/podio_schema_evolution.py b/python/podio_schema_evolution.py index 7bd5b676b..947a44da9 100755 --- a/python/podio_schema_evolution.py +++ b/python/podio_schema_evolution.py @@ -225,7 +225,7 @@ def heuristics_members(self, added_members, dropped_members, schema_changes): """make analysis of member changes in a given data type """ for dropped_member in dropped_members: added_members_in_definition = [member for member in added_members if - dropped_member.definition_name == member.definition_name] + dropped_member.definition_name == member.definition_name] for added_member in added_members_in_definition: if added_member.member.full_type == dropped_member.member.full_type: # this is a rename candidate. So let's see whether it has been explicitly declared by the user diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index f8a62432e..c3ec95b1f 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -165,7 +165,15 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const { return {{ package_name }}::meta::schemaVersion; } - {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, 1) }} +// anonymous namespace for registration with the CollectionBufferFactory. This +// ensures that we don't have to make up arbitrary namespace names here, since +// none of this is publicly visible +namespace { + {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, -1) }} + +{% if old_schema_version is defined %} + {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations_old, OneToOneRelations_old, VectorMembers_old, old_schema_version) }} +{% endif %} // The usual trick with an IIFE and a static variable inside a funtion and then // making sure to call that function during shared library loading @@ -184,6 +192,14 @@ bool registerCollection() { podio::SchemaEvolution::Priority::AutoGenerated ); +{% if old_schema_version is defined %} + // register a buffer creation function for the schema evolution buffer + factory.registerCreationFunc("{{ class.full_type }}Collection", {{ old_schema_version }}, createBuffersV{{old_schema_version}}); + + //Make the SchemaEvolution aware of any other non-trivial conversion + // TODO +{% endif %} + return true; }(); return reg; diff --git a/python/templates/CollectionData.h.jinja2 b/python/templates/CollectionData.h.jinja2 index 0eee7dabe..039fd1f4a 100644 --- a/python/templates/CollectionData.h.jinja2 +++ b/python/templates/CollectionData.h.jinja2 @@ -11,6 +11,11 @@ {{ include }} {% endfor %} +// schema evolution specific includes +{% if schema_evolution_data is defined %} +#include "{{ incfolder }}{{ schema_evolution_data }}Data" +{% endif %} + // podio specific includes #include "podio/CollectionBuffers.h" #include "podio/ICollectionProvider.h" diff --git a/python/templates/macros/collections.jinja2 b/python/templates/macros/collections.jinja2 index d1159bcb8..1195aed07 100644 --- a/python/templates/macros/collections.jinja2 +++ b/python/templates/macros/collections.jinja2 @@ -155,16 +155,19 @@ void {{ class.bare_type }}Collection::print(std::ostream& os, bool flush) const {% macro createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, schemaVersion) %} -// anonymous namespace for registration with the CollectionBufferFactory. This -// ensures that we don't have to make up arbitrary namespace names here, since -// none of this is publicly visible -namespace { +{% if schemaVersion == -1 %} podio::CollectionReadBuffers createBuffers(bool isSubset) { +{% else %} +podio::CollectionReadBuffers createBuffersV{{ schemaVersion }}(bool isSubset) { +{% endif %} auto readBuffers = podio::CollectionReadBuffers{}; readBuffers.type = "{{ class.full_type }}Collection"; +{% if schemaVersion == -1 %} readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion; - readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; - +{% else %} + readBuffers.schemaVersion = {{ schemaVersion }}; +{% endif %} + readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; //TODO: replace this part for schema evolution // The number of ObjectID vectors is either 1 or the sum of OneToMany and // OneToOne relations const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }}; From 5b4164311782b231e4c86ef3394d70100dee4748 Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Fri, 1 Sep 2023 09:53:43 +0200 Subject: [PATCH 16/31] bump of schema version for testing. version 1 is already reserved for non-versioned legacy data --- tests/datalayout.yaml | 2 +- tests/schema_evolution/datalayout_old.yaml | 2 +- tests/schema_evolution/schema_evolution.yaml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/datalayout.yaml b/tests/datalayout.yaml index 369d39b58..7f3eb8e79 100755 --- a/tests/datalayout.yaml +++ b/tests/datalayout.yaml @@ -1,5 +1,5 @@ --- -schema_version : 2 +schema_version : 3 options : # should getters / setters be prefixed with get / set? diff --git a/tests/schema_evolution/datalayout_old.yaml b/tests/schema_evolution/datalayout_old.yaml index 514a75b72..965563958 100755 --- a/tests/schema_evolution/datalayout_old.yaml +++ b/tests/schema_evolution/datalayout_old.yaml @@ -1,5 +1,5 @@ --- -schema_version : 1 +schema_version : 2 options : # should getters / setters be prefixed with get / set? diff --git a/tests/schema_evolution/schema_evolution.yaml b/tests/schema_evolution/schema_evolution.yaml index 561f36fbe..5f9bf55bc 100644 --- a/tests/schema_evolution/schema_evolution.yaml +++ b/tests/schema_evolution/schema_evolution.yaml @@ -1,6 +1,6 @@ --- -from_schema_version : 1 -to_schema_version : 2 +from_schema_version : 2 +to_schema_version : 3 evolutions: From afa0e4117970b92405792fb67ee70b715990ebcc Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Fri, 1 Sep 2023 09:56:36 +0200 Subject: [PATCH 17/31] add missing schema evolution pieces; prepare for ioread rules in reflex dicts --- python/templates/Collection.cc.jinja2 | 29 ++++++++++++++++------ python/templates/macros/collections.jinja2 | 3 ++- python/templates/selection.xml.jinja2 | 16 ++++++++++-- src/SchemaEvolution.cc | 27 ++++++++++---------- 4 files changed, 52 insertions(+), 23 deletions(-) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index c3ec95b1f..2b8e66478 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -9,6 +9,10 @@ #include "{{ incfolder }}{{ class.bare_type }}Collection.h" #include "{{ incfolder }}DatamodelDefinition.h" +{% if old_schema_version is defined %} +#include "{{ incfolder }}{{ class.bare_type }}v{{ old_schema_version }}Data.h" +{% endif %} + {% for include in includes_coll_cc %} {{ include }} {% endfor %} @@ -183,21 +187,32 @@ bool registerCollection() { factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers); // Make the SchemaEvolution aware of the current version by - // registering a no-op function with it + // registering a no-op function for this and all preceeding versions + // will be overriden whenever an explicit action is required + for (unsigned int schemaVersion=1; schemaVersion< {{ package_name }}::meta::schemaVersion+1; ++schemaVersion) { + podio::SchemaEvolution::mutInstance().registerEvolutionFunc( + "{{ class.full_type }}Collection", + schemaVersion, + {{ package_name }}::meta::schemaVersion, + podio::SchemaEvolution::noOpSchemaEvolution, + podio::SchemaEvolution::Priority::AutoGenerated + ); + } + +{% if old_schema_version is defined %} + // register a buffer creation function for the schema evolution buffer + // factory.registerCreationFunc("{{ class.full_type }}Collection", {{ old_schema_version }}, createBuffersV{{old_schema_version}}); //TODO + + //Make the SchemaEvolution aware of any other non-trivial conversion podio::SchemaEvolution::mutInstance().registerEvolutionFunc( "{{ class.full_type }}Collection", - {{ package_name }}::meta::schemaVersion, + {{ old_schema_version }}, {{ package_name }}::meta::schemaVersion, podio::SchemaEvolution::noOpSchemaEvolution, podio::SchemaEvolution::Priority::AutoGenerated ); -{% if old_schema_version is defined %} - // register a buffer creation function for the schema evolution buffer - factory.registerCreationFunc("{{ class.full_type }}Collection", {{ old_schema_version }}, createBuffersV{{old_schema_version}}); - //Make the SchemaEvolution aware of any other non-trivial conversion - // TODO {% endif %} return true; diff --git a/python/templates/macros/collections.jinja2 b/python/templates/macros/collections.jinja2 index 1195aed07..d2cf95567 100644 --- a/python/templates/macros/collections.jinja2 +++ b/python/templates/macros/collections.jinja2 @@ -164,10 +164,11 @@ podio::CollectionReadBuffers createBuffersV{{ schemaVersion }}(bool isSubset) { readBuffers.type = "{{ class.full_type }}Collection"; {% if schemaVersion == -1 %} readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion; + readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; {% else %} readBuffers.schemaVersion = {{ schemaVersion }}; + readBuffers.data = isSubset ? nullptr : new std::vector<{{ class.bare_type }}v{{ schemaVersion }}Data>; {% endif %} - readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer; //TODO: replace this part for schema evolution // The number of ObjectID vectors is either 1 or the sum of OneToMany and // OneToOne relations const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }}; diff --git a/python/templates/selection.xml.jinja2 b/python/templates/selection.xml.jinja2 index b0da9ff74..61d9a7a9c 100644 --- a/python/templates/selection.xml.jinja2 +++ b/python/templates/selection.xml.jinja2 @@ -1,11 +1,19 @@ {% macro class_selection(class, prefix='', postfix='') %} {%- if class.namespace %} - + {%- else %} - + {%- endif %} {% endmacro %} +{% macro ioread(iorule) %} + + + +{% endmacro %} + @@ -31,4 +39,8 @@ {% endfor %} + +{% for iorule in iorules %} +{{ ioread(iorule) }} +{% endfor %} diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index 2377e5833..7ea128ac6 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -24,15 +24,14 @@ podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(const podio::Collect } const auto& typeEvolFuncs = m_evolutionFuncs[mapIndex]; - if (fromVersion < typeEvolFuncs.size() - 1) { + if (fromVersion < typeEvolFuncs.size() ) { // Do we need this check? In principle we could ensure at registration // time that this is always guaranteed return typeEvolFuncs[fromVersion - 1](oldBuffers, fromVersion); } } - std::cerr << "PODIO WARNING: evolveBuffers has no knowledge of how to evolve buffers for " << collType << std::endl; - // TODO: exception + std::cerr << "PODIO WARNING: evolveBuffers has no knowledge of how to evolve buffers for " << collType << " from version " << fromVersion << std::endl; // TODO: exception return oldBuffers; } @@ -53,23 +52,25 @@ void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaV m_evolutionFuncs.emplace_back(EvolFuncVersionMapT{}); } - // From here on out we don't need the mutabale any longer + // From here on out we don't need the mutable any longer const auto& [_, mapIndex] = typeIt->second; auto& versionMap = m_evolutionFuncs[mapIndex]; const auto prevSize = versionMap.size(); - if (prevSize < fromVersion) { - versionMap.resize(fromVersion); - versionMap[fromVersion - 1] = evolutionFunc; - } else { - if (priority == Priority::UserDefined) { - versionMap[fromVersion - 1] = evolutionFunc; - } else { - std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; - } + if (prevSize < currentVersion) { + versionMap.resize(currentVersion); } + versionMap[fromVersion - 1] = evolutionFunc; + // TODO: temporarily switching off UserDefined logic + //if (priority == Priority::UserDefined) { + // versionMap[fromVersion - 1] = evolutionFunc; + //} else { + // std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; + // } + //} } + podio::CollectionReadBuffers SchemaEvolution::noOpSchemaEvolution(podio::CollectionReadBuffers&& buffers, SchemaVersionT) { return buffers; From 39a42b6f2ebb793f8d1f47e1c918c278e805625a Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Fri, 1 Sep 2023 10:43:19 +0200 Subject: [PATCH 18/31] add code generation for reflex schema evolution --- python/podio_class_generator.py | 50 +++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 69f75c3e6..dfaf6436b 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -76,6 +76,9 @@ class IncludeFrom(IntEnum): INTERNAL = 1 # include from within the datamodel EXTERNAL = 2 # include from an upstream datamodel +class RootIoRule: + pass + class ClassGenerator: """The entry point for reading a datamodel definition and generating the @@ -99,6 +102,7 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr # information to update the selection.xml self.root_schema_evolution_component_names = set() self.root_schema_evolution_datatype_names = set() + self.root_schema_evolution_iorules = set() try: self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm) @@ -132,6 +136,7 @@ def process(self): self._write_edm_def_file() if 'ROOT' in self.io_handlers: + self.prepare_iorules() self._create_selection_xml() self._write_cmake_lists_file() @@ -327,9 +332,22 @@ def _process_datatype(self, name, definition): else: if member.full_type in self.root_schema_evolution_dict.keys(): needs_schema_evolution = True + # prepare the ROOT I/O rule + print(member.full_type) + dir(member) +# iorule = RootIoRule() +# iorule.sourceClass = member.full_type +# iorule.targetClass = member.full_type +# iorule.version = 2 #self.schema_version.lstrip("v") +# iorule.target = "y" +# iorule.source = "int y_old" +# iorule.code = "std::cout<< onfile.y_old << y << std::endl; y = onfile.y_old;" +# if len(self.root_schema_evolution_iorules) == 0: +# self.root_schema_evolution_iorules.add(iorule) self._replaceComponentInPaths(member.full_type, member.full_type + self.old_schema_version, - schema_evolution_datatype['includes']) + schema_evolution_datatype['includes_data']) member.full_type = member.full_type + self.old_schema_version + member.bare_type = member.bare_type + self.old_schema_version if needs_schema_evolution: print(" Preparing explicit schema evolution for %s" % (name)) @@ -350,6 +368,29 @@ def _process_datatype(self, name, definition): if 'SIO' in self.io_handlers: self._fill_templates('SIOBlock', datatype) + def prepare_iorules(self): + """Prepare the IORules to be put in the Reflex dictionary""" + for type_name, schema_changes in self.root_schema_evolution_dict.items(): + for schema_change in schema_changes: + if type(schema_change) == RenamedMember: + # find out the type of the renamed member + component = self.datamodel.components[type_name] + for member in component["Members"]: + if member.name == schema_change.member_name_new: + member_type = member.full_type + + iorule = RootIoRule() + iorule.sourceClass = type_name + iorule.targetClass = type_name + iorule.version = self.old_schema_version.lstrip("v") + iorule.source = f'{member_type} {schema_change.member_name_old}' + iorule.target = schema_change.member_name_new + iorule.code = f'std::cout<< "reading {schema_change.member_name_old} with value " << onfile.{schema_change.member_name_old} << std::endl; {iorule.target} = onfile.{schema_change.member_name_old};' + self.root_schema_evolution_iorules.add(iorule) + else: + raise NotImplementedError("Schema evolution for this type not yet implemented") + + def _preprocess_for_obj(self, datatype): """Do the preprocessing that is necessary for the Obj classes""" fwd_declarations = defaultdict(list) @@ -570,10 +611,13 @@ def _needs_include(self, classname) -> IncludeFrom: def _create_selection_xml(self): """Create the selection xml that is necessary for ROOT I/O""" - data = {'components': [DataType(c) for c in self.datamodel.components], + data = {'version': self.datamodel.schema_version, + 'components': [DataType(c) for c in self.datamodel.components], 'datatypes': [DataType(d) for d in self.datamodel.datatypes], 'old_schema_components': [DataType(d) for d in - self.root_schema_evolution_datatype_names | self.root_schema_evolution_component_names]} # noqa + self.root_schema_evolution_datatype_names | self.root_schema_evolution_component_names], # noqa + 'iorules': self.root_schema_evolution_iorules} + self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data)) def _build_include(self, member): From 28ca56697b3d7e7a2d14ad7ae8445b26fd7b0f2c Mon Sep 17 00:00:00 2001 From: hegner Date: Mon, 11 Sep 2023 10:11:06 +0200 Subject: [PATCH 19/31] Update SchemaEvolution.cc --- src/SchemaEvolution.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index 34d0718e7..23059f93f 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -61,13 +61,9 @@ void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaV versionMap.resize(currentVersion); } versionMap[fromVersion - 1] = evolutionFunc; - // TODO: temporarily switching off UserDefined logic - //if (priority == Priority::UserDefined) { - // versionMap[fromVersion - 1] = evolutionFunc; - //} else { - // std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; - // } - //} + if (priority != Priority::UserDefined) { + std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; + } } From fab33e8c58e21a1a0b78d07dbebecf7f921999ad Mon Sep 17 00:00:00 2001 From: hegner Date: Mon, 11 Sep 2023 10:34:32 +0200 Subject: [PATCH 20/31] Update Collection.cc.jinja2 --- python/templates/Collection.cc.jinja2 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index fb92a18a7..286f454b4 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -179,9 +179,10 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const { namespace { {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, -1) }} -{% if old_schema_version is defined %} - {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations_old, OneToOneRelations_old, VectorMembers_old, old_schema_version) }} -{% endif %} +// SCHEMA EVOLUTION: Not yet required with only ROOT backend +// {% if old_schema_version is defined %} +// {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations_old, OneToOneRelations_old, VectorMembers_old, old_schema_version) }} +// {% endif %} // The usual trick with an IIFE and a static variable inside a funtion and then // making sure to call that function during shared library loading @@ -205,6 +206,7 @@ bool registerCollection() { {% if old_schema_version is defined %} // register a buffer creation function for the schema evolution buffer + // SCHEMA EVOLUTION: Not yet required with only ROOT backend // factory.registerCreationFunc("{{ class.full_type }}Collection", {{ old_schema_version }}, createBuffersV{{old_schema_version}}); //TODO //Make the SchemaEvolution aware of any other non-trivial conversion From 178b5204efe00a35fbb1c2f961d1ec29d0f74c36 Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Mon, 11 Sep 2023 11:14:28 +0200 Subject: [PATCH 21/31] disable currently unused schema evolution parts --- python/templates/Collection.cc.jinja2 | 2 ++ src/SchemaEvolution.cc | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 286f454b4..2021494a4 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -179,10 +179,12 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const { namespace { {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, -1) }} +{# // SCHEMA EVOLUTION: Not yet required with only ROOT backend // {% if old_schema_version is defined %} // {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations_old, OneToOneRelations_old, VectorMembers_old, old_schema_version) }} // {% endif %} +#} // The usual trick with an IIFE and a static variable inside a funtion and then // making sure to call that function during shared library loading diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index 23059f93f..ea14f5ca9 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -60,9 +60,10 @@ void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaV if (prevSize < currentVersion) { versionMap.resize(currentVersion); } + versionMap[fromVersion - 1] = evolutionFunc; if (priority != Priority::UserDefined) { - std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; + // std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; } } From 905b4738f9057cd15e949a2712261291c1717635 Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Mon, 11 Sep 2023 12:30:22 +0200 Subject: [PATCH 22/31] address static code checker warnings --- .github/scripts/pylint.rc | 2 +- python/podio_class_generator.py | 69 +++++++++++++++++++-------------- src/SchemaEvolution.cc | 7 ++-- 3 files changed, 44 insertions(+), 34 deletions(-) diff --git a/.github/scripts/pylint.rc b/.github/scripts/pylint.rc index 2db65ccd7..4d1492c68 100644 --- a/.github/scripts/pylint.rc +++ b/.github/scripts/pylint.rc @@ -285,7 +285,7 @@ max-statements=50 max-parents=7 # Maximum number of attributes for a class (see R0902). -max-attributes=25 +max-attributes=30 # Minimum number of public methods for a class (see R0903). min-public-methods=0 diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index d5b617e81..a23b8ac4e 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -76,8 +76,16 @@ class IncludeFrom(IntEnum): INTERNAL = 1 # include from within the datamodel EXTERNAL = 2 # include from an upstream datamodel + class RootIoRule: - pass + """A placeholder IORule class""" + def __init__(self): + self.sourceClass = None + self.targetClass = None + self.version = None + self.source = None + self.target = None + self.code = None class ClassGenerator: @@ -95,14 +103,15 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr self.old_yamlfile = old_description self.evolution_file = evolution_file self.old_schema_version = None + self.old_schema_version_int = None self.old_datamodel = None self.old_datamodels_components = set() self.old_datamodels_datatypes = set() - self.root_schema_evolution_dict = {} # containing the root relevant schema evolution per datatype + self.root_schema_dict = {} # containing the root relevant schema evolution per datatype # information to update the selection.xml - self.root_schema_evolution_component_names = set() - self.root_schema_evolution_datatype_names = set() - self.root_schema_evolution_iorules = set() + self.root_schema_component_names = set() + self.root_schema_datatype_names = set() + self.root_schema_iorules = set() try: self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm) @@ -153,7 +162,7 @@ def preprocess_schema_evolution(self): evolution_file=self.evolution_file) comparator.read() comparator.compare() - self.old_schema_version = "v%i" % comparator.datamodel_old.schema_version + self.old_schema_version = f"v{comparator.datamodel_old.schema_version}" self.old_schema_version_int = comparator.datamodel_old.schema_version # some sanity checks if len(comparator.errors) > 0: @@ -171,14 +180,14 @@ def preprocess_schema_evolution(self): # now go through all the io_handlers and see what we have to do if 'ROOT' in self.io_handlers: - for item in root_filter(comparator.schema_changes): - schema_evolutions = self.root_schema_evolution_dict.get(item.klassname) - if (schema_evolutions is None): - schema_evolutions = [] - self.root_schema_evolution_dict[item.klassname] = schema_evolutions + for item in root_filter(comparator.schema_changes): + schema_evolutions = self.root_schema_dict.get(item.klassname) + if schema_evolutions is None: + schema_evolutions = [] + self.root_schema_dict[item.klassname] = schema_evolutions - # add whatever is relevant to our ROOT schema evolutions - self.root_schema_evolution_dict[item.klassname].append(item) + # add whatever is relevant to our ROOT schema evolutions + self.root_schema_dict[item.klassname].append(item) def print_report(self): """Print a summary report about the generated code""" @@ -283,8 +292,8 @@ def _process_component(self, name, component): # Add potentially older schema for schema evolution # based on ROOT capabilities for now - if name in self.root_schema_evolution_dict.keys(): - schema_evolutions = self.root_schema_evolution_dict[name] + if name in self.root_schema_dict: + schema_evolutions = self.root_schema_dict[name] component = copy.deepcopy(component) for schema_evolution in schema_evolutions: if isinstance(schema_evolution, RenamedMember): @@ -295,9 +304,10 @@ def _process_component(self, name, component): else: raise NotImplementedError self._fill_templates('Component', component) - self.root_schema_evolution_component_names.add(name + self.old_schema_version) + self.root_schema_component_names.add(name + self.old_schema_version) def _replaceComponentInPaths(self, oldname, newname, paths): + """Replace component in paths""" # strip the namespace shortoldname = oldname.split("::")[-1] shortnewname = newname.split("::")[-1] @@ -316,13 +326,13 @@ def _process_datatype(self, name, definition): schema_evolution_datatype = copy.deepcopy(datatype) needs_schema_evolution = False # check whether it has a renamed member - # if name in self.root_schema_evolution_dict.keys(): + # if name in self.root_schema_dict.keys(): # for member in schema_evolution_datatype['Members']: # if # then check for components with a renamed member for member in schema_evolution_datatype['Members']: if member.is_array: - if member.array_type in self.root_schema_evolution_dict.keys(): + if member.array_type in self.root_schema_dict: needs_schema_evolution = True self._replaceComponentInPaths(member.array_type, member.array_type + self.old_schema_version, schema_evolution_datatype['includes_data']) @@ -330,7 +340,7 @@ def _process_datatype(self, name, definition): member.array_type = member.array_type + self.old_schema_version else: - if member.full_type in self.root_schema_evolution_dict.keys(): + if member.full_type in self.root_schema_dict: needs_schema_evolution = True # prepare the ROOT I/O rule print(member.full_type) @@ -342,18 +352,18 @@ def _process_datatype(self, name, definition): # iorule.target = "y" # iorule.source = "int y_old" # iorule.code = "std::cout<< onfile.y_old << y << std::endl; y = onfile.y_old;" -# if len(self.root_schema_evolution_iorules) == 0: -# self.root_schema_evolution_iorules.add(iorule) +# if len(self.root_schema_iorules) == 0: +# self.root_schema_iorules.add(iorule) self._replaceComponentInPaths(member.full_type, member.full_type + self.old_schema_version, schema_evolution_datatype['includes_data']) member.full_type = member.full_type + self.old_schema_version member.bare_type = member.bare_type + self.old_schema_version if needs_schema_evolution: - print(" Preparing explicit schema evolution for %s" % (name)) + print(f" Preparing explicit schema evolution for {name}") schema_evolution_datatype['class'].bare_type = schema_evolution_datatype['class'].bare_type + self.old_schema_version # noqa self._fill_templates('Data', schema_evolution_datatype) - self.root_schema_evolution_datatype_names.add(name + self.old_schema_version) + self.root_schema_datatype_names.add(name + self.old_schema_version) self._fill_templates('Collection', datatype, schema_evolution_datatype) else: self._fill_templates('Collection', datatype) @@ -370,9 +380,9 @@ def _process_datatype(self, name, definition): def prepare_iorules(self): """Prepare the IORules to be put in the Reflex dictionary""" - for type_name, schema_changes in self.root_schema_evolution_dict.items(): + for type_name, schema_changes in self.root_schema_dict.items(): for schema_change in schema_changes: - if type(schema_change) == RenamedMember: + if isinstance(schema_change, RenamedMember): # find out the type of the renamed member component = self.datamodel.components[type_name] for member in component["Members"]: @@ -385,12 +395,11 @@ def prepare_iorules(self): iorule.version = self.old_schema_version.lstrip("v") iorule.source = f'{member_type} {schema_change.member_name_old}' iorule.target = schema_change.member_name_new - iorule.code = f'std::cout<< "reading {schema_change.member_name_old} with value " << onfile.{schema_change.member_name_old} << std::endl; {iorule.target} = onfile.{schema_change.member_name_old};' - self.root_schema_evolution_iorules.add(iorule) + iorule.code = f'std::cout<< "reading {schema_change.member_name_old} with value " << onfile.{schema_change.member_name_old} << std::endl; {iorule.target} = onfile.{schema_change.member_name_old};' # noqa + self.root_schema_iorules.add(iorule) else: raise NotImplementedError("Schema evolution for this type not yet implemented") - def _preprocess_for_obj(self, datatype): """Do the preprocessing that is necessary for the Obj classes""" fwd_declarations = defaultdict(list) @@ -615,8 +624,8 @@ def _create_selection_xml(self): 'components': [DataType(c) for c in self.datamodel.components], 'datatypes': [DataType(d) for d in self.datamodel.datatypes], 'old_schema_components': [DataType(d) for d in - self.root_schema_evolution_datatype_names | self.root_schema_evolution_component_names], # noqa - 'iorules': self.root_schema_evolution_iorules} + self.root_schema_datatype_names | self.root_schema_component_names], # noqa + 'iorules': self.root_schema_iorules} self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data)) diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index ea14f5ca9..8a42e074a 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -24,14 +24,15 @@ podio::CollectionReadBuffers SchemaEvolution::evolveBuffers(const podio::Collect } const auto& typeEvolFuncs = m_evolutionFuncs[mapIndex]; - if (fromVersion < typeEvolFuncs.size() ) { + if (fromVersion < typeEvolFuncs.size()) { // Do we need this check? In principle we could ensure at registration // time that this is always guaranteed return typeEvolFuncs[fromVersion - 1](oldBuffers, fromVersion); } } - std::cerr << "PODIO WARNING: evolveBuffers has no knowledge of how to evolve buffers for " << collType << " from version " << fromVersion << std::endl; // TODO: exception + std::cerr << "PODIO WARNING: evolveBuffers has no knowledge of how to evolve buffers for " << collType + << " from version " << fromVersion << std::endl; // TODO: exception return oldBuffers; } @@ -63,7 +64,7 @@ void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaV versionMap[fromVersion - 1] = evolutionFunc; if (priority != Priority::UserDefined) { - // std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; + // std::cerr << "Not updating evolution function because priority is not UserDefined" << std::endl; } } From 6b8fe156308161289e212b67e1bfc2e2d5dea4b4 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Mon, 11 Sep 2023 16:35:00 +0200 Subject: [PATCH 23/31] Fix bug re-introduced in merging master --- python/templates/macros/collections.jinja2 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/templates/macros/collections.jinja2 b/python/templates/macros/collections.jinja2 index d2cf95567..ae3d5687d 100644 --- a/python/templates/macros/collections.jinja2 +++ b/python/templates/macros/collections.jinja2 @@ -192,14 +192,15 @@ podio::CollectionReadBuffers createBuffersV{{ schemaVersion }}(bool isSubset) { }; readBuffers.recast = [](podio::CollectionReadBuffers& buffers) { + // We only have any of these buffers if this is not a subset collection if (buffers.data) { buffers.data = podio::CollectionWriteBuffers::asVector<{{ class.full_type }}Data>(buffers.data); - } {% if VectorMembers %} {% for member in VectorMembers %} - (*buffers.vectorMembers)[{{ loop.index0 }}].second = podio::CollectionWriteBuffers::asVector<{{ member.full_type }}>((*buffers.vectorMembers)[{{ loop.index0 }}].second); + (*buffers.vectorMembers)[{{ loop.index0 }}].second = podio::CollectionWriteBuffers::asVector<{{ member.full_type }}>((*buffers.vectorMembers)[{{ loop.index0 }}].second); {% endfor %} {% endif %} + } }; return readBuffers; From 51462d9321aedcff4dd7726471fa56d2b74d54cb Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Mon, 11 Sep 2023 22:41:01 +0200 Subject: [PATCH 24/31] addressing PR comments --- python/podio_class_generator.py | 20 ++++++-------------- python/podio_schema_evolution.py | 11 +++++++++++ src/SchemaEvolution.cc | 1 - 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index a23b8ac4e..2063280c6 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -306,8 +306,9 @@ def _process_component(self, name, component): self._fill_templates('Component', component) self.root_schema_component_names.add(name + self.old_schema_version) - def _replaceComponentInPaths(self, oldname, newname, paths): - """Replace component in paths""" + @staticmethod + def _replace_component_in_paths(oldname, newname, paths): + """Replace component name by another one in existing paths""" # strip the namespace shortoldname = oldname.split("::")[-1] shortnewname = newname.split("::")[-1] @@ -334,7 +335,7 @@ def _process_datatype(self, name, definition): if member.is_array: if member.array_type in self.root_schema_dict: needs_schema_evolution = True - self._replaceComponentInPaths(member.array_type, member.array_type + self.old_schema_version, + self._replace_component_in_paths(member.array_type, member.array_type + self.old_schema_version, schema_evolution_datatype['includes_data']) member.full_type = member.full_type.replace(member.array_type, member.array_type + self.old_schema_version) member.array_type = member.array_type + self.old_schema_version @@ -345,16 +346,7 @@ def _process_datatype(self, name, definition): # prepare the ROOT I/O rule print(member.full_type) dir(member) -# iorule = RootIoRule() -# iorule.sourceClass = member.full_type -# iorule.targetClass = member.full_type -# iorule.version = 2 #self.schema_version.lstrip("v") -# iorule.target = "y" -# iorule.source = "int y_old" -# iorule.code = "std::cout<< onfile.y_old << y << std::endl; y = onfile.y_old;" -# if len(self.root_schema_iorules) == 0: -# self.root_schema_iorules.add(iorule) - self._replaceComponentInPaths(member.full_type, member.full_type + self.old_schema_version, + self._replace_component_in_paths(member.full_type, member.full_type + self.old_schema_version, schema_evolution_datatype['includes_data']) member.full_type = member.full_type + self.old_schema_version member.bare_type = member.bare_type + self.old_schema_version @@ -395,7 +387,7 @@ def prepare_iorules(self): iorule.version = self.old_schema_version.lstrip("v") iorule.source = f'{member_type} {schema_change.member_name_old}' iorule.target = schema_change.member_name_new - iorule.code = f'std::cout<< "reading {schema_change.member_name_old} with value " << onfile.{schema_change.member_name_old} << std::endl; {iorule.target} = onfile.{schema_change.member_name_old};' # noqa + iorule.code = f'{iorule.target} = onfile.{schema_change.member_name_old};' self.root_schema_iorules.add(iorule) else: raise NotImplementedError("Schema evolution for this type not yet implemented") diff --git a/python/podio_schema_evolution.py b/python/podio_schema_evolution.py index 947a44da9..e2b7a916f 100755 --- a/python/podio_schema_evolution.py +++ b/python/podio_schema_evolution.py @@ -107,6 +107,17 @@ def __init__(self, name, member_name_old, member_name_new): super().__init__(f"'{self.name}': member '{self.member_name_old}' renamed to '{self.member_name_new}'.") +class RootIoRule: + """A placeholder IORule class""" + def __init__(self): + self.sourceClass = None + self.targetClass = None + self.version = None + self.source = None + self.target = None + self.code = None + + def sio_filter(schema_changes): """ Checks what is required/supported for the SIO backend diff --git a/src/SchemaEvolution.cc b/src/SchemaEvolution.cc index 8a42e074a..4576cde1e 100644 --- a/src/SchemaEvolution.cc +++ b/src/SchemaEvolution.cc @@ -68,7 +68,6 @@ void SchemaEvolution::registerEvolutionFunc(const std::string& collType, SchemaV } } - podio::CollectionReadBuffers SchemaEvolution::noOpSchemaEvolution(podio::CollectionReadBuffers&& buffers, SchemaVersionT) { return buffers; From 8b8f99a3b6cfe13f11ce3b6fe9c7476027f351af Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 12 Sep 2023 11:00:47 +0200 Subject: [PATCH 25/31] Rearrange schema evolution tests to not interfere with others Make the schema evolution datamodel the new one and use the original one to write old data. This allows the roundtrip tests to work without additional work because the additional schema evolution code is not used anywhere for that. --- tests/CMakeLists.txt | 2 -- tests/datalayout.yaml | 7 ++++- tests/schema_evolution/CMakeLists.txt | 28 ++++++++++--------- tests/schema_evolution/README.md | 14 +++++----- ...atalayout_old.yaml => datalayout_new.yaml} | 14 +++++----- tests/schema_evolution/read_new_data.h | 5 ++-- tests/schema_evolution/root_io/CMakeLists.txt | 4 +-- .../root_io/read_new_data_root.cpp | 2 +- .../root_io/write_old_data_root.cpp | 2 +- tests/schema_evolution/schema_evolution.yaml | 6 ++-- tests/schema_evolution/write_old_data.h | 2 +- 11 files changed, 46 insertions(+), 40 deletions(-) rename tests/schema_evolution/{datalayout_old.yaml => datalayout_new.yaml} (97%) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 90f1b3f98..50d2846ab 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -10,8 +10,6 @@ SET(podio_PYTHON_DIR ${PROJECT_SOURCE_DIR}/python CACHE PATH "Path to the podio PODIO_GENERATE_DATAMODEL(datamodel datalayout.yaml headers sources IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} - OLD_DESCRIPTION schema_evolution/datalayout_old.yaml - SCHEMA_EVOLUTION schema_evolution/schema_evolution.yaml ) # Use the cmake building blocks to add the different parts (conditionally) diff --git a/tests/datalayout.yaml b/tests/datalayout.yaml index 3c25cd003..de3ae703c 100755 --- a/tests/datalayout.yaml +++ b/tests/datalayout.yaml @@ -1,5 +1,5 @@ --- -schema_version : 3 +schema_version : 2 options : # should getters / setters be prefixed with get / set? @@ -9,6 +9,11 @@ options : includeSubfolder: True components : + ToBeDroppedStruct: + Description: "A struct to be dropped during schema evolution" + Members: + - int x + SimpleStruct: Members: - int x [mm] diff --git a/tests/schema_evolution/CMakeLists.txt b/tests/schema_evolution/CMakeLists.txt index 3a6ecc659..a86963d82 100644 --- a/tests/schema_evolution/CMakeLists.txt +++ b/tests/schema_evolution/CMakeLists.txt @@ -1,33 +1,35 @@ -PODIO_GENERATE_DATAMODEL(datamodel datalayout_old.yaml headers sources +PODIO_GENERATE_DATAMODEL(datamodel datalayout_new.yaml headers sources IO_BACKEND_HANDLERS ${PODIO_IO_HANDLERS} - OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_old + OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_new + OLD_DESCRIPTION ${PROJECT_SOURCE_DIR}/tests/datalayout.yaml + SCHEMA_EVOLUTION schema_evolution.yaml ) -PODIO_ADD_DATAMODEL_CORE_LIB(TestDataModel_v1 "${headers}" "${sources}" - OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_old +PODIO_ADD_DATAMODEL_CORE_LIB(TestDataModel_v3 "${headers}" "${sources}" + OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_new ) -PODIO_ADD_ROOT_IO_DICT(TestDataModel_v1Dict TestDataModel_v1 "${headers}" ${CMAKE_CURRENT_BINARY_DIR}/datamodel_old/src/selection.xml - OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_old +PODIO_ADD_ROOT_IO_DICT(TestDataModel_v3Dict TestDataModel_v3 "${headers}" ${CMAKE_CURRENT_BINARY_DIR}/datamodel_new/src/selection.xml + OUTPUT_FOLDER ${CMAKE_CURRENT_BINARY_DIR}/datamodel_new ) -PODIO_ADD_SIO_IO_BLOCKS(TestDataModel_v1 "${headers}" "${sources}") +PODIO_ADD_SIO_IO_BLOCKS(TestDataModel_v3 "${headers}" "${sources}") #--- Helper function to create a test in an environment that is suitable to -#--- write data in an old schema version -function(PODIO_CREATE_WRITE_OLD_DATA_TEST sourcefile additional_libs) +#--- read data in a new schema version +function(PODIO_CREATE_READ_NEW_DATA_TEST sourcefile additional_libs) string( REPLACE ".cpp" "" name ${sourcefile} ) add_executable( ${name} ${sourcefile} ) add_test(NAME ${name} COMMAND ${name}) - target_link_libraries(${name} PRIVATE TestDataModel_v1 ${additional_libs}) - target_include_directories(${name} PRIVATE ${CMAKE_SOURCE_DIR}/tests/schema_evolution) + target_link_libraries(${name} PRIVATE TestDataModel_v3 ${additional_libs}) + target_include_directories(${name} PRIVATE ${PROJECT_SOURCE_DIR}/tests/schema_evolution) set_property(TEST ${name} PROPERTY ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_BINARY_DIR}/src:${CMAKE_BINARY_DIR}/tests/schema_evolution:$:$<$:$>:$ENV{LD_LIBRARY_PATH} + LD_LIBRARY_PATH=${PROJECT_BINARY_DIR}/src:${PROJECT_BINARY_DIR}/tests/schema_evolution:$:$<$:$>:$ENV{LD_LIBRARY_PATH} PODIO_SIO_BLOCK=${CMAKE_CURRENT_BINARY_DIR} - ROOT_INCLUDE_PATH=${CMAKE_BINARY_DIR}/tests/schema_evolution/datamodel_old:${CMAKE_SOURCE_DIR}/include + ROOT_INCLUDE_PATH=${PROJECT_BINARY_DIR}/tests/schema_evolution/datamodel_new:${PROJECT_SOURCE_DIR}/include ) endfunction() diff --git a/tests/schema_evolution/README.md b/tests/schema_evolution/README.md index 4ed849b48..92bd4a667 100644 --- a/tests/schema_evolution/README.md +++ b/tests/schema_evolution/README.md @@ -2,18 +2,18 @@ This folder contains tests for the schema evolution functionality in podio. The functionality is tested by first writing data with an old schema version and then reading in with the current schema version. -[`datalayout_old.yaml`](./datalayout_old.yaml) holds the definition of the old -version, using schema version 1, while the +[`datalayout_new.yaml`](./datalayout_new.yaml) holds the definition of the new +version, using schema version 3, while the [`datalayout.yaml`](../datalayout.yaml) that is also used for the other I/O -tests is used as the current version (schema version 2). +tests is used as the old version (schema version 2). ## Differences between the two schema versions Since it is not immediately visible from the test code this list contains the differences between the two schema versions, and also how this evolution is tested (if it is supported) -| component / datatype | difference from v1 to v2 | purpose of test | tested with | +| component / datatype | difference from v2 to v3 | purpose of test | tested with | |--|--|--|--| -| `SimpleStruct` | no `int y` member in v1 | Addition of new members in components | As member of `ExampleWithArrayComponent` | -| `ExampleHit` | no `double energy` member in v1 | Addition of new members in datatypes | Directly via `ExampleHit` | -| `ex2::NamespaceStruct` | renaming of `y_old` to `y` | Renaming of member variables | As member of `ex42::ExampleWithNamespace` | +| `SimpleStruct` | no `int t` member in v2 | Addition of new members in components | As member of `ExampleWithArrayComponent` | +| `ExampleHit` | no `double t` member in v1 | Addition of new members in datatypes | Directly via `ExampleHit` | +| `ex2::NamespaceStruct` | renaming of `y` to `y_new` | Renaming of member variables | As member of `ex42::ExampleWithNamespace` | diff --git a/tests/schema_evolution/datalayout_old.yaml b/tests/schema_evolution/datalayout_new.yaml similarity index 97% rename from tests/schema_evolution/datalayout_old.yaml rename to tests/schema_evolution/datalayout_new.yaml index 965563958..7d55b6e91 100755 --- a/tests/schema_evolution/datalayout_old.yaml +++ b/tests/schema_evolution/datalayout_new.yaml @@ -1,5 +1,5 @@ --- -schema_version : 2 +schema_version : 3 options : # should getters / setters be prefixed with get / set? @@ -10,14 +10,12 @@ options : components : - ToBeDroppedStruct: - Members: - - int x - SimpleStruct: Members: - int x + - int y - int z + - int t - std::array p # can also add c'tors: ExtraCode : @@ -33,7 +31,7 @@ components : ex2::NamespaceStruct: Members: - int x - - int y_old + - int y_new ex2::NamespaceInNamespaceStruct: Members: @@ -52,7 +50,7 @@ components : datatypes : - EventInfoOldName: + EventInfoNewName: Description : "Event info" Author : "B. Hegner" Members : @@ -71,6 +69,8 @@ datatypes : - double x // x-coordinate - double y // y-coordinate - double z // z-coordinate + - double energy // energy + - double t // time ExampleMC : Description : "Example MC-particle" diff --git a/tests/schema_evolution/read_new_data.h b/tests/schema_evolution/read_new_data.h index 59ed31550..afe3c0d2d 100644 --- a/tests/schema_evolution/read_new_data.h +++ b/tests/schema_evolution/read_new_data.h @@ -32,10 +32,11 @@ int readExampleHit(const podio::Frame& event) { const auto& coll = event.get("datatypeMemberAdditionTest"); auto elem = coll[0]; - ASSERT_EQUAL(elem.energy(), 0, "New datatype member variable not 0 initialized"); + ASSERT_EQUAL(elem.t(), 0, "New datatype member variable not 0 initialized"); ASSERT_EQUAL(elem.x(), 1.23, "Member variables unrelated to schema evolution have changed"); ASSERT_EQUAL(elem.y(), 1.23, "Member variables unrelated to schema evolution have changed"); ASSERT_EQUAL(elem.z(), 1.23, "Member variables unrelated to schema evolution have changed"); + ASSERT_EQUAL(elem.energy(), 0, "Member variables unrelated to schema evolution have changed"); ASSERT_EQUAL(elem.cellID(), 0xcaffee, "Member variables unrelated to schema evolution have changed"); return 0; @@ -45,7 +46,7 @@ int readExampleWithNamespace(const podio::Frame& event) { const auto& coll = event.get("componentMemberRenameTest"); auto elem = coll[0]; - ASSERT_EQUAL(elem.y(), 42, "Renamed component member variable does not have the expected value"); + ASSERT_EQUAL(elem.y_new(), 42, "Renamed component member variable does not have the expected value"); ASSERT_EQUAL(elem.x(), 123, "Member variables unrelated to schema evolution have changed"); return 0; diff --git a/tests/schema_evolution/root_io/CMakeLists.txt b/tests/schema_evolution/root_io/CMakeLists.txt index e756cc01a..53e8b451f 100644 --- a/tests/schema_evolution/root_io/CMakeLists.txt +++ b/tests/schema_evolution/root_io/CMakeLists.txt @@ -1,4 +1,4 @@ -PODIO_CREATE_WRITE_OLD_DATA_TEST(write_old_data_root.cpp "TestDataModel_v1Dict;podio::podioRootIO") +CREATE_PODIO_TEST(write_old_data_root.cpp "TestDataModelDict;podioRootIO") +PODIO_CREATE_READ_NEW_DATA_TEST(read_new_data_root.cpp "TestDataModel_v3Dict;podio::podioRootIO") -CREATE_PODIO_TEST(read_new_data_root.cpp "TestDataModelDict;podioRootIO") set_property(TEST read_new_data_root PROPERTY DEPENDS write_old_data_root) diff --git a/tests/schema_evolution/root_io/read_new_data_root.cpp b/tests/schema_evolution/root_io/read_new_data_root.cpp index 8bf874e78..77f5736a4 100644 --- a/tests/schema_evolution/root_io/read_new_data_root.cpp +++ b/tests/schema_evolution/root_io/read_new_data_root.cpp @@ -1,4 +1,4 @@ -#include "schema_evolution/read_new_data.h" +#include "read_new_data.h" #include "podio/ROOTFrameReader.h" diff --git a/tests/schema_evolution/root_io/write_old_data_root.cpp b/tests/schema_evolution/root_io/write_old_data_root.cpp index c5fc23880..a3456229f 100644 --- a/tests/schema_evolution/root_io/write_old_data_root.cpp +++ b/tests/schema_evolution/root_io/write_old_data_root.cpp @@ -1,4 +1,4 @@ -#include "write_old_data.h" +#include "schema_evolution/write_old_data.h" #include "podio/ROOTFrameWriter.h" diff --git a/tests/schema_evolution/schema_evolution.yaml b/tests/schema_evolution/schema_evolution.yaml index 5f9bf55bc..82b6e5e93 100644 --- a/tests/schema_evolution/schema_evolution.yaml +++ b/tests/schema_evolution/schema_evolution.yaml @@ -6,8 +6,8 @@ evolutions: ex2::NamespaceStruct: member_rename: - - y_old - y + - y_new - EventInfoOldName: - class_renamed_to: EventInfo + EventInfo: + class_renamed_to: EventInfoNewName diff --git a/tests/schema_evolution/write_old_data.h b/tests/schema_evolution/write_old_data.h index 9232e59af..552b6a67c 100644 --- a/tests/schema_evolution/write_old_data.h +++ b/tests/schema_evolution/write_old_data.h @@ -35,7 +35,7 @@ auto writeExampleHit() { auto writeExampleWithNamespace() { ex42::ExampleWithNamespaceCollection coll; auto elem = coll.create(); - elem.y_old(42); + elem.y(42); elem.x(123); return coll; From 36d06e63b6850e34dce9ea16a764e2429f080bb1 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 12 Sep 2023 11:16:43 +0200 Subject: [PATCH 26/31] Add a test for a `float` to `double` migration --- tests/schema_evolution/README.md | 1 + tests/schema_evolution/datalayout_new.yaml | 2 +- tests/schema_evolution/read_new_data.h | 11 +++++++++++ tests/schema_evolution/write_old_data.h | 10 ++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/schema_evolution/README.md b/tests/schema_evolution/README.md index 92bd4a667..1c83fc3f6 100644 --- a/tests/schema_evolution/README.md +++ b/tests/schema_evolution/README.md @@ -17,3 +17,4 @@ tested (if it is supported) | `SimpleStruct` | no `int t` member in v2 | Addition of new members in components | As member of `ExampleWithArrayComponent` | | `ExampleHit` | no `double t` member in v1 | Addition of new members in datatypes | Directly via `ExampleHit` | | `ex2::NamespaceStruct` | renaming of `y` to `y_new` | Renaming of member variables | As member of `ex42::ExampleWithNamespace` | +| `ex42::ExampleWithARelation` | type of `number` member | migration of `float` to `double` | Direcetly via `ex42::ExampleWithARelation` | diff --git a/tests/schema_evolution/datalayout_new.yaml b/tests/schema_evolution/datalayout_new.yaml index 7d55b6e91..e9938fd1c 100755 --- a/tests/schema_evolution/datalayout_new.yaml +++ b/tests/schema_evolution/datalayout_new.yaml @@ -150,7 +150,7 @@ datatypes : Description : "Type with namespace and namespaced relation" Author : "Joschka Lingemann" Members: - - float number // just a number + - double number // just a number OneToOneRelations : - ex42::ExampleWithNamespace ref // a ref in a namespace OneToManyRelations : diff --git a/tests/schema_evolution/read_new_data.h b/tests/schema_evolution/read_new_data.h index afe3c0d2d..cdc3abf46 100644 --- a/tests/schema_evolution/read_new_data.h +++ b/tests/schema_evolution/read_new_data.h @@ -2,6 +2,7 @@ #define PODIO_TESTS_SCHEMAEVOLUTION_READNEWDATA_H // NOLINT(llvm-header-guard): folder structure not suitable #include "datamodel/ExampleHitCollection.h" +#include "datamodel/ExampleWithARelationCollection.h" #include "datamodel/ExampleWithArrayComponentCollection.h" #include "datamodel/ExampleWithNamespaceCollection.h" @@ -52,6 +53,15 @@ int readExampleWithNamespace(const podio::Frame& event) { return 0; } +int readExampleWithARelation(const podio::Frame& event) { + const auto& coll = event.get("floatToDoubleMemberTest"); + auto elem = coll[0]; + + ASSERT_EQUAL(elem.number(), (double)3.14f, "Conversion from float to double member does not work as expected"); + + return 0; +} + template int read_new_data(const std::string& filename) { ReaderT reader{}; @@ -63,6 +73,7 @@ int read_new_data(const std::string& filename) { result += readSimpleStruct(event); result += readExampleHit(event); result += readExampleWithNamespace(event); + result += readExampleWithARelation(event); return result; } diff --git a/tests/schema_evolution/write_old_data.h b/tests/schema_evolution/write_old_data.h index 552b6a67c..7676ae34b 100644 --- a/tests/schema_evolution/write_old_data.h +++ b/tests/schema_evolution/write_old_data.h @@ -2,6 +2,7 @@ #define PODIO_TESTS_SCHEMAEVOLUTION_WRITEOLDDATA_H // NOLINT(llvm-header-guard): folder structure not suitable #include "datamodel/ExampleHitCollection.h" +#include "datamodel/ExampleWithARelationCollection.h" #include "datamodel/ExampleWithArrayComponentCollection.h" #include "datamodel/ExampleWithNamespaceCollection.h" @@ -41,12 +42,21 @@ auto writeExampleWithNamespace() { return coll; } +auto writeExamplewWithARelation() { + ex42::ExampleWithARelationCollection coll; + auto elem = coll.create(); + elem.number(3.14f); + + return coll; +} + podio::Frame createFrame() { podio::Frame event; event.put(writeSimpleStruct(), "simpleStructTest"); event.put(writeExampleHit(), "datatypeMemberAdditionTest"); event.put(writeExampleWithNamespace(), "componentMemberRenameTest"); + event.put(writeExamplewWithARelation(), "floatToDoubleMemberTest"); return event; } From 0caa209d936b22a62f42edada463a5d5fa6f82e6 Mon Sep 17 00:00:00 2001 From: Benedikt Hegner Date: Tue, 12 Sep 2023 23:50:31 +0200 Subject: [PATCH 27/31] addressing review comments and code checker --- python/podio_class_generator.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index 2063280c6..aa80c8cb0 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -15,7 +15,7 @@ import jinja2 from podio_schema_evolution import DataModelComparator # dealing with cyclic imports -from podio_schema_evolution import RenamedMember, root_filter +from podio_schema_evolution import RenamedMember, root_filter, RootIoRule from podio.podio_config_reader import PodioConfigReader from podio.generator_utils import DataType, DefinitionError, DataModelJSONEncoder @@ -77,17 +77,6 @@ class IncludeFrom(IntEnum): EXTERNAL = 2 # include from an upstream datamodel -class RootIoRule: - """A placeholder IORule class""" - def __init__(self): - self.sourceClass = None - self.targetClass = None - self.version = None - self.source = None - self.target = None - self.code = None - - class ClassGenerator: """The entry point for reading a datamodel definition and generating the necessary source code from it.""" @@ -135,7 +124,7 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr def process(self): """Run the actual generation""" - self.preprocess_schema_evolution() + self.process_schema_evolution() for name, component in self.datamodel.components.items(): self._process_component(name, component) @@ -152,7 +141,7 @@ def process(self): self.print_report() - def preprocess_schema_evolution(self): + def process_schema_evolution(self): """Process the schema evolution""" # have to make all necessary comparisons # which are the ones that changed? @@ -181,13 +170,8 @@ def preprocess_schema_evolution(self): # now go through all the io_handlers and see what we have to do if 'ROOT' in self.io_handlers: for item in root_filter(comparator.schema_changes): - schema_evolutions = self.root_schema_dict.get(item.klassname) - if schema_evolutions is None: - schema_evolutions = [] - self.root_schema_dict[item.klassname] = schema_evolutions - - # add whatever is relevant to our ROOT schema evolutions - self.root_schema_dict[item.klassname].append(item) + # add whatever is relevant to our ROOT schema evolution + self.root_schema_dict.setdefault(item.klassname, []).append(item) def print_report(self): """Print a summary report about the generated code""" @@ -336,7 +320,7 @@ def _process_datatype(self, name, definition): if member.array_type in self.root_schema_dict: needs_schema_evolution = True self._replace_component_in_paths(member.array_type, member.array_type + self.old_schema_version, - schema_evolution_datatype['includes_data']) + schema_evolution_datatype['includes_data']) member.full_type = member.full_type.replace(member.array_type, member.array_type + self.old_schema_version) member.array_type = member.array_type + self.old_schema_version @@ -344,10 +328,8 @@ def _process_datatype(self, name, definition): if member.full_type in self.root_schema_dict: needs_schema_evolution = True # prepare the ROOT I/O rule - print(member.full_type) - dir(member) self._replace_component_in_paths(member.full_type, member.full_type + self.old_schema_version, - schema_evolution_datatype['includes_data']) + schema_evolution_datatype['includes_data']) member.full_type = member.full_type + self.old_schema_version member.bare_type = member.bare_type + self.old_schema_version From 6921613396946d5bd7d2450ebe10a127757092f4 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Fri, 8 Sep 2023 09:43:30 +0200 Subject: [PATCH 28/31] Reduce unnecessary template instantiations --- python/templates/Collection.cc.jinja2 | 4 ++++ python/templates/Collection.h.jinja2 | 2 +- python/templates/MutableObject.cc.jinja2 | 4 ++++ python/templates/MutableObject.h.jinja2 | 2 +- python/templates/Object.cc.jinja2 | 4 ++++ python/templates/Object.h.jinja2 | 4 ++-- python/templates/macros/declarations.jinja2 | 2 +- 7 files changed, 17 insertions(+), 5 deletions(-) diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 2021494a4..f8b7a17e8 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -17,6 +17,10 @@ {{ include }} {% endfor %} +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) +#include "nlohmann/json.hpp" +#endif + // standard includes #include #include diff --git a/python/templates/Collection.h.jinja2 b/python/templates/Collection.h.jinja2 index 0e49aec53..616e1bfc8 100644 --- a/python/templates/Collection.h.jinja2 +++ b/python/templates/Collection.h.jinja2 @@ -19,7 +19,7 @@ #include "podio/CollectionIDTable.h" #if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) -#include "nlohmann/json.hpp" +#include "nlohmann/json_fwd.hpp" #endif #include diff --git a/python/templates/MutableObject.cc.jinja2 b/python/templates/MutableObject.cc.jinja2 index 9ed099278..edbca8da9 100644 --- a/python/templates/MutableObject.cc.jinja2 +++ b/python/templates/MutableObject.cc.jinja2 @@ -8,6 +8,10 @@ {{ include }} {% endfor %} +#ifdef PODIO_JSON_OUTPUT && !defined(__CLING__) +#include "nlohmann/json.hpp" +#endif + #include {{ utils.namespace_open(class.namespace) }} diff --git a/python/templates/MutableObject.h.jinja2 b/python/templates/MutableObject.h.jinja2 index ebdac4f53..b669a2b80 100644 --- a/python/templates/MutableObject.h.jinja2 +++ b/python/templates/MutableObject.h.jinja2 @@ -17,7 +17,7 @@ #include #ifdef PODIO_JSON_OUTPUT -#include "nlohmann/json.hpp" +#include "nlohmann/json_fwd.hpp" #endif {{ utils.forward_decls(forward_declarations) }} diff --git a/python/templates/Object.cc.jinja2 b/python/templates/Object.cc.jinja2 index 0d4b07b7e..37414ec52 100644 --- a/python/templates/Object.cc.jinja2 +++ b/python/templates/Object.cc.jinja2 @@ -8,6 +8,10 @@ {{ include }} {% endfor %} +#ifdef PODIO_JSON_OUTPUT && !defined(__CLING__) +#include "nlohmann/json.hpp" +#endif + #include {{ utils.namespace_open(class.namespace) }} diff --git a/python/templates/Object.h.jinja2 b/python/templates/Object.h.jinja2 index 42d737cb5..bec9fc0da 100644 --- a/python/templates/Object.h.jinja2 +++ b/python/templates/Object.h.jinja2 @@ -14,8 +14,8 @@ #include #include -#ifdef PODIO_JSON_OUTPUT -#include "nlohmann/json.hpp" +#ifdef PODIO_JSON_OUTPUT && !defined(__CLING__) +#include "nlohmann/json_fwd.hpp" #endif {{ utils.forward_decls(forward_declarations) }} diff --git a/python/templates/macros/declarations.jinja2 b/python/templates/macros/declarations.jinja2 index af46f499e..a0c976bc4 100644 --- a/python/templates/macros/declarations.jinja2 +++ b/python/templates/macros/declarations.jinja2 @@ -134,7 +134,7 @@ {%- endmacro %} {% macro json_output(type, prefix='') %} -#ifdef PODIO_JSON_OUTPUT +#ifdef PODIO_JSON_OUTPUT && !defined(__CLING__) void to_json(nlohmann::json& j, const {{ prefix }}{{ type }}& value); #endif {% endmacro %} From 5cbfec5a42d4bec651fe68f5c5cb8fc53b487880 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Fri, 8 Sep 2023 09:56:36 +0200 Subject: [PATCH 29/31] Fix preprocessor directives --- python/templates/MutableObject.cc.jinja2 | 2 +- python/templates/MutableObject.h.jinja2 | 2 +- python/templates/Object.cc.jinja2 | 2 +- python/templates/Object.h.jinja2 | 2 +- python/templates/macros/declarations.jinja2 | 2 +- python/templates/macros/implementations.jinja2 | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/templates/MutableObject.cc.jinja2 b/python/templates/MutableObject.cc.jinja2 index edbca8da9..bb851a48c 100644 --- a/python/templates/MutableObject.cc.jinja2 +++ b/python/templates/MutableObject.cc.jinja2 @@ -8,7 +8,7 @@ {{ include }} {% endfor %} -#ifdef PODIO_JSON_OUTPUT && !defined(__CLING__) +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) #include "nlohmann/json.hpp" #endif diff --git a/python/templates/MutableObject.h.jinja2 b/python/templates/MutableObject.h.jinja2 index b669a2b80..354db473d 100644 --- a/python/templates/MutableObject.h.jinja2 +++ b/python/templates/MutableObject.h.jinja2 @@ -16,7 +16,7 @@ #include #include -#ifdef PODIO_JSON_OUTPUT +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) #include "nlohmann/json_fwd.hpp" #endif diff --git a/python/templates/Object.cc.jinja2 b/python/templates/Object.cc.jinja2 index 37414ec52..bbb8e8856 100644 --- a/python/templates/Object.cc.jinja2 +++ b/python/templates/Object.cc.jinja2 @@ -8,7 +8,7 @@ {{ include }} {% endfor %} -#ifdef PODIO_JSON_OUTPUT && !defined(__CLING__) +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) #include "nlohmann/json.hpp" #endif diff --git a/python/templates/Object.h.jinja2 b/python/templates/Object.h.jinja2 index bec9fc0da..742f7bbdc 100644 --- a/python/templates/Object.h.jinja2 +++ b/python/templates/Object.h.jinja2 @@ -14,7 +14,7 @@ #include #include -#ifdef PODIO_JSON_OUTPUT && !defined(__CLING__) +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) #include "nlohmann/json_fwd.hpp" #endif diff --git a/python/templates/macros/declarations.jinja2 b/python/templates/macros/declarations.jinja2 index a0c976bc4..166b53121 100644 --- a/python/templates/macros/declarations.jinja2 +++ b/python/templates/macros/declarations.jinja2 @@ -134,7 +134,7 @@ {%- endmacro %} {% macro json_output(type, prefix='') %} -#ifdef PODIO_JSON_OUTPUT && !defined(__CLING__) +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) void to_json(nlohmann::json& j, const {{ prefix }}{{ type }}& value); #endif {% endmacro %} diff --git a/python/templates/macros/implementations.jinja2 b/python/templates/macros/implementations.jinja2 index 0ddb29e39..096a93ae5 100644 --- a/python/templates/macros/implementations.jinja2 +++ b/python/templates/macros/implementations.jinja2 @@ -210,7 +210,7 @@ std::ostream& operator<<(std::ostream& o, const {{ type }}& value) { {%- endmacro %} {% macro json_output(class, members, single_relations, multi_relations, vector_members, get_syntax, prefix='') %} -#ifdef PODIO_JSON_OUTPUT +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) void to_json(nlohmann::json& j, const {{ prefix }}{{ class.bare_type }}& value) { j = nlohmann::json{ {% set comma = joiner(",") %} From 10ec62e4398dbeefa1f52ce9989a3c79cadd6cf1 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Fri, 8 Sep 2023 14:03:21 +0200 Subject: [PATCH 30/31] Move function implementations into .cc files for Components --- python/podio_class_generator.py | 1 - python/templates/CMakeLists.txt | 1 + python/templates/Component.cc.jinja2 | 39 ++++++++++++++++++++++++++++ python/templates/Component.h.jinja2 | 26 +++---------------- 4 files changed, 43 insertions(+), 24 deletions(-) create mode 100644 python/templates/Component.cc.jinja2 diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index aa80c8cb0..eafcff289 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -233,7 +233,6 @@ def get_fn_format(tmpl): endings = { 'Data': ('h',), - 'Component': ('h',), 'PrintInfo': ('h',) }.get(template_base, ('h', 'cc')) diff --git a/python/templates/CMakeLists.txt b/python/templates/CMakeLists.txt index be5f4b307..76b36354b 100644 --- a/python/templates/CMakeLists.txt +++ b/python/templates/CMakeLists.txt @@ -4,6 +4,7 @@ set(PODIO_TEMPLATES ${CMAKE_CURRENT_LIST_DIR}/CollectionData.cc.jinja2 ${CMAKE_CURRENT_LIST_DIR}/CollectionData.h.jinja2 ${CMAKE_CURRENT_LIST_DIR}/Component.h.jinja2 + ${CMAKE_CURRENT_LIST_DIR}/Component.cc.jinja2 ${CMAKE_CURRENT_LIST_DIR}/Data.h.jinja2 ${CMAKE_CURRENT_LIST_DIR}/Obj.h.jinja2 ${CMAKE_CURRENT_LIST_DIR}/Obj.cc.jinja2 diff --git a/python/templates/Component.cc.jinja2 b/python/templates/Component.cc.jinja2 new file mode 100644 index 000000000..9ab26cd86 --- /dev/null +++ b/python/templates/Component.cc.jinja2 @@ -0,0 +1,39 @@ +{% import "macros/utils.jinja2" as utils %} +// AUTOMATICALLY GENERATED FILE - DO NOT EDIT + +#include "{{ incfolder }}{{ class.bare_type }}.h" + +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) +#include "nlohmann/json.hpp" +#endif + +{{ utils.namespace_open(class.namespace) }} + +std::ostream& operator<<(std::ostream& o, const {{class.full_type}}& value) { +{% for member in Members %} +{% if member.is_array %} + for (int i = 0; i < {{ member.array_size }}; ++i) { + o << value.{{ member.name }}[i] << "|"; + } + o << " "; +{% else %} + o << value.{{ member.name }} << " "; +{% endif %} +{% endfor %} + + return o; +} + +#if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) +void to_json(nlohmann::json& j, const {{ class.bare_type }}& value) { + j = nlohmann::json{ +{% set comma = joiner(",") %} +{% for member in Members %} + {{ comma() }}{"{{ member.name }}", value.{{ member.name }}} +{% endfor %} + }; +} +#endif + +{{ utils.namespace_close(class.namespace) }} + diff --git a/python/templates/Component.h.jinja2 b/python/templates/Component.h.jinja2 index bc775b362..b410c5c8b 100644 --- a/python/templates/Component.h.jinja2 +++ b/python/templates/Component.h.jinja2 @@ -11,7 +11,7 @@ #include #if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) -#include "nlohmann/json.hpp" +#include "nlohmann/json_fwd.hpp" #endif {{ utils.namespace_open(class.namespace) }} @@ -25,30 +25,10 @@ public: {{ utils.if_present(ExtraCode, "declaration") }} }; -inline std::ostream& operator<<(std::ostream& o, const {{class.full_type}}& value) { -{% for member in Members %} -{% if member.is_array %} - for (int i = 0; i < {{ member.array_size }}; ++i) { - o << value.{{ member.name }}[i] << "|"; - } - o << " "; -{% else %} - o << value.{{ member.name }} << " "; -{% endif %} -{% endfor %} - - return o; -} +std::ostream& operator<<(std::ostream& o, const {{class.full_type}}& value); #if defined(PODIO_JSON_OUTPUT) && !defined(__CLING__) -inline void to_json(nlohmann::json& j, const {{ class.bare_type }}& value) { - j = nlohmann::json{ -{% set comma = joiner(",") %} -{% for member in Members %} - {{ comma() }}{"{{ member.name }}", value.{{ member.name }}} -{% endfor %} - }; -} +void to_json(nlohmann::json& j, const {{ class.bare_type }}& value); #endif {{ utils.namespace_close(class.namespace) }} From 1b425c8d8efeb1f9af1ca37cffd807416040a3ec Mon Sep 17 00:00:00 2001 From: hegner Date: Wed, 13 Sep 2023 15:57:32 +0200 Subject: [PATCH 31/31] Update python/podio_class_generator.py Co-authored-by: Thomas Madlener --- python/podio_class_generator.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/podio_class_generator.py b/python/podio_class_generator.py index eafcff289..59918ece9 100755 --- a/python/podio_class_generator.py +++ b/python/podio_class_generator.py @@ -309,11 +309,6 @@ def _process_datatype(self, name, definition): # Compute and prepare the potential schema evolution parts schema_evolution_datatype = copy.deepcopy(datatype) needs_schema_evolution = False - # check whether it has a renamed member - # if name in self.root_schema_dict.keys(): - # for member in schema_evolution_datatype['Members']: - # if - # then check for components with a renamed member for member in schema_evolution_datatype['Members']: if member.is_array: if member.array_type in self.root_schema_dict: