Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Schema evolution based on ROOT and Reflex dictionaries #472

Merged
merged 34 commits into from
Sep 13, 2023
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
0b57a4d
Remove unused fields
tmadlener Apr 3, 2023
074a0e5
Add SchemaEvolution singleton to hold evolution functions
tmadlener May 3, 2023
df470ed
Inject type information into collection buffers
tmadlener May 8, 2023
7fee6f2
Inject current schema version into buffers from buffer factory
tmadlener May 9, 2023
2ba52f8
[wip] Start populating SchemaEvolution registry
tmadlener May 8, 2023
8942b3d
[wip] Split registration into two steps
tmadlener May 9, 2023
60f24d3
[wip] Require registration of each evolution function
tmadlener May 11, 2023
64d148c
[clang-tidy] Mark inputs as const& for now
tmadlener Jun 14, 2023
9e006fd
Create schema_evolution test subdirectory and build old datamodel
tmadlener Jun 15, 2023
14c2010
Add first simple tests for "trivial" schema evolution
tmadlener Jun 15, 2023
3ccd157
Fix test environment and typo
tmadlener Jun 15, 2023
557f347
Add failing test for renamed member variables
tmadlener Jun 20, 2023
77fec67
Merge branch 'master' into schema-evol-library
hegner Jun 27, 2023
b65c6ee
move Collection::createBuffers template into macro
hegner Jul 6, 2023
2807c55
creating components and datatypes for explicit schema evolution
hegner Jul 7, 2023
20e30d0
add more schema evolution code generation
hegner Jul 10, 2023
5b41643
bump of schema version for testing. version 1 is already reserved for…
hegner Sep 1, 2023
afa0e41
add missing schema evolution pieces; prepare for ioread rules in refl…
hegner Sep 1, 2023
39a42b6
add code generation for reflex schema evolution
hegner Sep 1, 2023
09455c0
Merge branch 'master' into schema-reflex
hegner Sep 11, 2023
28ca566
Update SchemaEvolution.cc
hegner Sep 11, 2023
fab33e8
Update Collection.cc.jinja2
hegner Sep 11, 2023
178b520
disable currently unused schema evolution parts
hegner Sep 11, 2023
905b473
address static code checker warnings
hegner Sep 11, 2023
6b8fe15
Fix bug re-introduced in merging master
tmadlener Sep 11, 2023
51462d9
addressing PR comments
hegner Sep 11, 2023
8b8f99a
Rearrange schema evolution tests to not interfere with others
tmadlener Sep 12, 2023
36d06e6
Add a test for a `float` to `double` migration
tmadlener Sep 12, 2023
0caa209
addressing review comments and code checker
hegner Sep 12, 2023
6921613
Reduce unnecessary template instantiations
tmadlener Sep 8, 2023
5cbfec5
Fix preprocessor directives
tmadlener Sep 8, 2023
10ec62e
Move function implementations into .cc files for Components
tmadlener Sep 8, 2023
f632556
Merge branch 'master' into schema-reflex
tmadlener Sep 13, 2023
1b425c8
Update python/podio_class_generator.py
hegner Sep 13, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/scripts/pylint.rc
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ max-statements=50
max-parents=7

# Maximum number of attributes for a class (see R0902).
max-attributes=25
max-attributes=30

# Minimum number of public methods for a class (see R0903).
min-public-methods=0
Expand Down
131 changes: 122 additions & 9 deletions python/podio_class_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
"""Podio class generator script"""

import copy
import os
import sys
import subprocess
Expand All @@ -14,6 +15,7 @@
import jinja2

from podio_schema_evolution import DataModelComparator # dealing with cyclic imports
from podio_schema_evolution import RenamedMember, root_filter, RootIoRule
from podio.podio_config_reader import PodioConfigReader
from podio.generator_utils import DataType, DefinitionError, DataModelJSONEncoder

Expand Down Expand Up @@ -89,9 +91,16 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr
# schema evolution specific code
self.old_yamlfile = old_description
self.evolution_file = evolution_file
self.old_schema_version = None
self.old_schema_version_int = None
self.old_datamodel = None
self.old_datamodels_components = set()
self.old_datamodels_datatypes = set()
self.root_schema_dict = {} # containing the root relevant schema evolution per datatype
# information to update the selection.xml
self.root_schema_component_names = set()
self.root_schema_datatype_names = set()
self.root_schema_iorules = set()

try:
self.datamodel = PodioConfigReader.read(yamlfile, package_name, upstream_edm)
Expand All @@ -115,19 +124,20 @@ def __init__(self, yamlfile, install_dir, package_name, io_handlers, verbose, dr

def process(self):
"""Run the actual generation"""
self.process_schema_evolution()

for name, component in self.datamodel.components.items():
self._process_component(name, component)

for name, datatype in self.datamodel.datatypes.items():
self._process_datatype(name, datatype)

self._write_edm_def_file()

if 'ROOT' in self.io_handlers:
self.prepare_iorules()
self._create_selection_xml()

self._write_cmake_lists_file()
self.process_schema_evolution()

self.print_report()

Expand All @@ -141,7 +151,8 @@ def process_schema_evolution(self):
evolution_file=self.evolution_file)
comparator.read()
comparator.compare()

self.old_schema_version = f"v{comparator.datamodel_old.schema_version}"
self.old_schema_version_int = comparator.datamodel_old.schema_version
# some sanity checks
if len(comparator.errors) > 0:
print(f"The given datamodels '{self.yamlfile}' and '{self.old_yamlfile}' \
Expand All @@ -156,6 +167,12 @@ def process_schema_evolution(self):
print(warning)
sys.exit(-1)

# now go through all the io_handlers and see what we have to do
if 'ROOT' in self.io_handlers:
for item in root_filter(comparator.schema_changes):
# add whatever is relevant to our ROOT schema evolution
self.root_schema_dict.setdefault(item.klassname, []).append(item)

def print_report(self):
"""Print a summary report about the generated code"""
if not self.verbose:
Expand All @@ -170,8 +187,15 @@ def print_report(self):
print(summaryline)
print()

def _eval_template(self, template, data):
def _eval_template(self, template, data, old_schema_data=None):
"""Fill the specified template"""
# merge the info of data and the old schema into a single dict
if old_schema_data:
data['OneToOneRelations_old'] = old_schema_data['OneToOneRelations']
data['OneToManyRelations_old'] = old_schema_data['OneToManyRelations']
data['VectorMembers_old'] = old_schema_data['VectorMembers']
data['old_schema_version'] = self.old_schema_version_int

return self.env.get_template(template).render(data)

def _write_file(self, name, content):
Expand Down Expand Up @@ -220,7 +244,7 @@ def get_fn_format(tmpl):

return fn_templates

def _fill_templates(self, template_base, data):
def _fill_templates(self, template_base, data, old_schema_data=None):
"""Fill the template and write the results to file"""
# Update the passed data with some global things that are the same for all
# files
Expand All @@ -229,7 +253,7 @@ def _fill_templates(self, template_base, data):
data['incfolder'] = self.incfolder

for filename, template in self._get_filenames_templates(template_base, data['class'].bare_type):
self._write_file(filename, self._eval_template(template, data))
self._write_file(filename, self._eval_template(template, data, old_schema_data))

def _process_component(self, name, component):
"""Process one component"""
Expand All @@ -247,12 +271,76 @@ def _process_component(self, name, component):

component['includes'] = self._sort_includes(includes)
component['class'] = DataType(name)

self._fill_templates('Component', component)

# Add potentially older schema for schema evolution
# based on ROOT capabilities for now
if name in self.root_schema_dict:
schema_evolutions = self.root_schema_dict[name]
component = copy.deepcopy(component)
for schema_evolution in schema_evolutions:
if isinstance(schema_evolution, RenamedMember):
for member in component['Members']:
if member.name == schema_evolution.member_name_new:
member.name = schema_evolution.member_name_old
component['class'] = DataType(name + self.old_schema_version)
else:
raise NotImplementedError
self._fill_templates('Component', component)
self.root_schema_component_names.add(name + self.old_schema_version)

@staticmethod
def _replace_component_in_paths(oldname, newname, paths):
"""Replace component name by another one in existing paths"""
# strip the namespace
shortoldname = oldname.split("::")[-1]
shortnewname = newname.split("::")[-1]
# and do the replace in place
for index, thePath in enumerate(paths):
if shortoldname in thePath:
newPath = thePath.replace(shortoldname, shortnewname)
paths[index] = newPath

def _process_datatype(self, name, definition):
"""Process one datatype"""
datatype = self._preprocess_datatype(name, definition)

# ROOT schema evolution preparation
# Compute and prepare the potential schema evolution parts
schema_evolution_datatype = copy.deepcopy(datatype)
needs_schema_evolution = False
# check whether it has a renamed member
# if name in self.root_schema_dict.keys():
# for member in schema_evolution_datatype['Members']:
# if
# then check for components with a renamed member
hegner marked this conversation as resolved.
Show resolved Hide resolved
for member in schema_evolution_datatype['Members']:
if member.is_array:
if member.array_type in self.root_schema_dict:
needs_schema_evolution = True
self._replace_component_in_paths(member.array_type, member.array_type + self.old_schema_version,
schema_evolution_datatype['includes_data'])
member.full_type = member.full_type.replace(member.array_type, member.array_type + self.old_schema_version)
member.array_type = member.array_type + self.old_schema_version

else:
if member.full_type in self.root_schema_dict:
needs_schema_evolution = True
# prepare the ROOT I/O rule
self._replace_component_in_paths(member.full_type, member.full_type + self.old_schema_version,
schema_evolution_datatype['includes_data'])
member.full_type = member.full_type + self.old_schema_version
member.bare_type = member.bare_type + self.old_schema_version

if needs_schema_evolution:
print(f" Preparing explicit schema evolution for {name}")
schema_evolution_datatype['class'].bare_type = schema_evolution_datatype['class'].bare_type + self.old_schema_version # noqa
self._fill_templates('Data', schema_evolution_datatype)
self.root_schema_datatype_names.add(name + self.old_schema_version)
self._fill_templates('Collection', datatype, schema_evolution_datatype)
else:
self._fill_templates('Collection', datatype)

self._fill_templates('Data', datatype)
self._fill_templates('Object', datatype)
self._fill_templates('MutableObject', datatype)
Expand All @@ -263,6 +351,28 @@ def _process_datatype(self, name, definition):
if 'SIO' in self.io_handlers:
self._fill_templates('SIOBlock', datatype)

def prepare_iorules(self):
"""Prepare the IORules to be put in the Reflex dictionary"""
for type_name, schema_changes in self.root_schema_dict.items():
for schema_change in schema_changes:
if isinstance(schema_change, RenamedMember):
# find out the type of the renamed member
component = self.datamodel.components[type_name]
for member in component["Members"]:
if member.name == schema_change.member_name_new:
member_type = member.full_type

iorule = RootIoRule()
iorule.sourceClass = type_name
iorule.targetClass = type_name
iorule.version = self.old_schema_version.lstrip("v")
iorule.source = f'{member_type} {schema_change.member_name_old}'
iorule.target = schema_change.member_name_new
iorule.code = f'{iorule.target} = onfile.{schema_change.member_name_old};'
self.root_schema_iorules.add(iorule)
else:
raise NotImplementedError("Schema evolution for this type not yet implemented")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is potentially confusing, without further information on what "type" is.


def _preprocess_for_obj(self, datatype):
"""Do the preprocessing that is necessary for the Obj classes"""
fwd_declarations = defaultdict(list)
Expand Down Expand Up @@ -483,10 +593,13 @@ def _needs_include(self, classname) -> IncludeFrom:

def _create_selection_xml(self):
"""Create the selection xml that is necessary for ROOT I/O"""
data = {'components': [DataType(c) for c in self.datamodel.components],
data = {'version': self.datamodel.schema_version,
'components': [DataType(c) for c in self.datamodel.components],
'datatypes': [DataType(d) for d in self.datamodel.datatypes],
'old_schema_components': [DataType(d) for d in
self.old_datamodels_datatypes | self.old_datamodels_components]}
self.root_schema_datatype_names | self.root_schema_component_names], # noqa
'iorules': self.root_schema_iorules}

self._write_file('selection.xml', self._eval_template('selection.xml.jinja2', data))

def _build_include(self, member):
Expand Down
13 changes: 12 additions & 1 deletion python/podio_schema_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,17 @@ def __init__(self, name, member_name_old, member_name_new):
super().__init__(f"'{self.name}': member '{self.member_name_old}' renamed to '{self.member_name_new}'.")


class RootIoRule:
tmadlener marked this conversation as resolved.
Show resolved Hide resolved
"""A placeholder IORule class"""
def __init__(self):
self.sourceClass = None
self.targetClass = None
self.version = None
self.source = None
self.target = None
self.code = None


def sio_filter(schema_changes):
"""
Checks what is required/supported for the SIO backend
Expand Down Expand Up @@ -225,7 +236,7 @@ def heuristics_members(self, added_members, dropped_members, schema_changes):
"""make analysis of member changes in a given data type """
for dropped_member in dropped_members:
added_members_in_definition = [member for member in added_members if
dropped_member.definition_name == member.definition_name]
dropped_member.definition_name == member.definition_name]
for added_member in added_members_in_definition:
if added_member.member.full_type == dropped_member.member.full_type:
# this is a rename candidate. So let's see whether it has been explicitly declared by the user
Expand Down
47 changes: 46 additions & 1 deletion python/templates/Collection.cc.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#include "{{ incfolder }}{{ class.bare_type }}Collection.h"
#include "{{ incfolder }}DatamodelDefinition.h"

{% if old_schema_version is defined %}
#include "{{ incfolder }}{{ class.bare_type }}v{{ old_schema_version }}Data.h"
{% endif %}

{% for include in includes_coll_cc %}
{{ include }}
{% endfor %}
Expand Down Expand Up @@ -173,7 +177,18 @@ podio::SchemaVersionT {{ collection_type }}::getSchemaVersion() const {
return {{ package_name }}::meta::schemaVersion;
}

{{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, 1) }}
// anonymous namespace for registration with the CollectionBufferFactory. This
// ensures that we don't have to make up arbitrary namespace names here, since
// none of this is publicly visible
namespace {
{{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, -1) }}

{#
// SCHEMA EVOLUTION: Not yet required with only ROOT backend
// {% if old_schema_version is defined %}
// {{ macros.createBuffers(class, package_name, collection_type, OneToManyRelations_old, OneToOneRelations_old, VectorMembers_old, old_schema_version) }}
// {% endif %}
#}

// The usual trick with an IIFE and a static variable inside a funtion and then
// making sure to call that function during shared library loading
Expand All @@ -182,6 +197,36 @@ bool registerCollection() {
auto& factory = podio::CollectionBufferFactory::mutInstance();
factory.registerCreationFunc("{{ class.full_type }}Collection", {{ package_name }}::meta::schemaVersion, createBuffers);

// Make the SchemaEvolution aware of the current version by
// registering a no-op function for this and all preceeding versions
// will be overriden whenever an explicit action is required
for (unsigned int schemaVersion=1; schemaVersion< {{ package_name }}::meta::schemaVersion+1; ++schemaVersion) {
podio::SchemaEvolution::mutInstance().registerEvolutionFunc(
"{{ class.full_type }}Collection",
schemaVersion,
{{ package_name }}::meta::schemaVersion,
podio::SchemaEvolution::noOpSchemaEvolution,
podio::SchemaEvolution::Priority::AutoGenerated
);
}

{% if old_schema_version is defined %}
// register a buffer creation function for the schema evolution buffer
// SCHEMA EVOLUTION: Not yet required with only ROOT backend
// factory.registerCreationFunc("{{ class.full_type }}Collection", {{ old_schema_version }}, createBuffersV{{old_schema_version}}); //TODO
tmadlener marked this conversation as resolved.
Show resolved Hide resolved

//Make the SchemaEvolution aware of any other non-trivial conversion
podio::SchemaEvolution::mutInstance().registerEvolutionFunc(
"{{ class.full_type }}Collection",
{{ old_schema_version }},
{{ package_name }}::meta::schemaVersion,
podio::SchemaEvolution::noOpSchemaEvolution,
podio::SchemaEvolution::Priority::AutoGenerated
);


{% endif %}

return true;
}();
return reg;
Expand Down
5 changes: 5 additions & 0 deletions python/templates/CollectionData.h.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
{{ include }}
{% endfor %}

// schema evolution specific includes
{% if schema_evolution_data is defined %}
#include "{{ incfolder }}{{ schema_evolution_data }}Data"
{% endif %}

// podio specific includes
#include "podio/CollectionBuffers.h"
#include "podio/ICollectionProvider.h"
Expand Down
14 changes: 9 additions & 5 deletions python/templates/macros/collections.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,20 @@ void {{ class.bare_type }}Collection::print(std::ostream& os, bool flush) const

{% macro createBuffers(class, package_name, collection_type, OneToManyRelations, OneToOneRelations, VectorMembers, schemaVersion) %}

// anonymous namespace for registration with the CollectionBufferFactory. This
// ensures that we don't have to make up arbitrary namespace names here, since
// none of this is publicly visible
namespace {
{% if schemaVersion == -1 %}
podio::CollectionReadBuffers createBuffers(bool isSubset) {
{% else %}
podio::CollectionReadBuffers createBuffersV{{ schemaVersion }}(bool isSubset) {
{% endif %}
auto readBuffers = podio::CollectionReadBuffers{};
readBuffers.type = "{{ class.full_type }}Collection";
{% if schemaVersion == -1 %}
readBuffers.schemaVersion = {{ package_name }}::meta::schemaVersion;
readBuffers.data = isSubset ? nullptr : new {{ class.bare_type }}DataContainer;

{% else %}
readBuffers.schemaVersion = {{ schemaVersion }};
readBuffers.data = isSubset ? nullptr : new std::vector<{{ class.bare_type }}v{{ schemaVersion }}Data>;
{% endif %}
// The number of ObjectID vectors is either 1 or the sum of OneToMany and
// OneToOne relations
const auto nRefs = isSubset ? 1 : {{ OneToManyRelations | length }} + {{ OneToOneRelations | length }};
Expand Down
Loading