From 36a121e47bafbba5bfa5d7a4cb71fd2cb4c51ce9 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 10 Jan 2025 22:38:08 +0100 Subject: [PATCH 01/10] use pynxtools.nomad.schema.Root --- src/pynxtools/nomad/entrypoints.py | 22 ++++++++++---- src/pynxtools/nomad/parser.py | 49 ++++++++++++++++-------------- src/pynxtools/nomad/schema.py | 2 +- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py index dfd957a8f..66b3cb843 100644 --- a/src/pynxtools/nomad/entrypoints.py +++ b/src/pynxtools/nomad/entrypoints.py @@ -76,7 +76,7 @@ def load(self): SearchQuantities, ) -schema = "pynxtools.nomad.schema.NeXus" +schema = "pynxtools.nomad.schema.Root" nexus_app = AppEntryPoint( name="NexusApp", @@ -105,17 +105,17 @@ def load(self): Column(quantity=f"entry_type", selected=True), Column( title="definition", - quantity=f"data.*.ENTRY[*].definition__field#{schema}", + quantity=f"data.ENTRY[*].definition__field#{schema}", selected=True, ), Column( title="start_time", - quantity=f"data.*.ENTRY[*].start_time__field#{schema}", + quantity=f"data.ENTRY[*].start_time__field#{schema}", selected=True, ), Column( title="title", - quantity=f"data.*.ENTRY[*].title__field#{schema}", + quantity=f"data.ENTRY[*].title__field#{schema}", selected=True, ), ], @@ -161,8 +161,8 @@ def load(self): "autorange": True, "nbins": 30, "scale": "linear", - "quantity": f"data.Root.datetime#{schema}", - "title": "Procesing Time", + "quantity": f"data.ENTRY.start_time__field#{schema}", + "title": "Start Time", "layout": { "lg": {"minH": 3, "minW": 3, "h": 4, "w": 12, "y": 0, "x": 0} }, @@ -177,6 +177,16 @@ def load(self): "lg": {"minH": 3, "minW": 3, "h": 8, "w": 4, "y": 0, "x": 12} }, }, + { + "type": "terms", + "show_input": False, + "scale": "linear", + "quantity": f"data.ENTRY.definition__field#{schema}", + "title": "Definition", + "layout": { + "lg": {"minH": 3, "minW": 3, "h": 8, "w": 4, "y": 0, "x": 16} + }, + }, { "type": "periodic_table", "scale": "linear", diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 2fea9afde..00ec67b61 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -60,6 +60,7 @@ def _to_section( nx_def: str, nx_node: Optional[ET.Element], current: MSection, + nx_root, ) -> MSection: """ Args: @@ -105,7 +106,17 @@ def _to_section( new_section = section break - if new_section is None: + if new_section is not None: + return new_section + if current == nx_root: + cls = getattr(nexus_schema, nx_def, None) + sec = cls() + new_def_spec = sec.m_def.all_sub_sections[nomad_def_name] + sec.m_create(new_def_spec.section_def.section_cls) + new_section = sec.m_get_sub_section(new_def_spec, -1) + current.ENTRY.append(new_section) + new_section.__dict__["nx_name"] = hdf_name + else: current.m_create(new_def.section_def.section_cls) new_section = current.m_get_sub_section(new_def, -1) new_section.__dict__["nx_name"] = hdf_name @@ -194,7 +205,7 @@ def _populate_data( # so values of non-scalar attribute will not end up in metainfo! attr_name = attr_name + "__attribute" - current = _to_section(attr_name, nx_def, nx_attr, current) + current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root) try: if nx_root or nx_parent.tag.endswith("group"): @@ -332,12 +343,13 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613 if nx_path is None or nx_path == "/": return - current: MSection = _to_section(None, nx_def, None, self.nx_root) + # current: MSection = _to_section(None, nx_def, None, self.nx_root) + current = self.nx_root depth: int = 1 current_hdf_path = "" for name in hdf_path.split("/")[1:]: nx_node = nx_path[depth] if depth < len(nx_path) else name - current = _to_section(name, nx_def, nx_node, current) + current = _to_section(name, nx_def, nx_node, current, self.nx_root) self._collect_class(current) depth += 1 if depth < len(nx_path): @@ -468,7 +480,7 @@ def parse( child_archives: Dict[str, EntryArchive] = None, ) -> None: self.archive = archive - self.nx_root = nexus_schema.NeXus() # type: ignore # pylint: disable=no-member + self.nx_root = nexus_schema.Root() # type: ignore # pylint: disable=no-member self.archive.data = self.nx_root self._logger = logger if logger else get_logger(__name__) @@ -483,25 +495,18 @@ def parse( archive.metadata = EntryMetadata() # Normalise experiment type - app_defs = str(self.nx_root).split("(")[1].split(")")[0].split(",") - app_def_list = [] - for app_elem in app_defs: - app = app_elem.lstrip() - try: - app_sec = getattr(self.nx_root, app) + # app_defs = str(self.nx_root).split("(")[1].split(")")[0].split(",") + app_def_list = set() + try: + app_entries = getattr(self.nx_root, "ENTRY") + for entry in app_entries: try: - app_entry = getattr(app_sec, "ENTRY") - if len(app_entry) < 1: - raise AttributeError() + app = entry.definition__field + app_def_list.add(rename_nx_for_nomad(app) if app else "Generic") except (AttributeError, TypeError): - app_entry = getattr(app_sec, "entry") - if len(app_entry) < 1: - raise AttributeError() - app_def_list.append( - app if app != rename_nx_for_nomad("NXroot") else "Generic" - ) - except (AttributeError, TypeError): - pass + pass + except (AttributeError, TypeError): + pass if len(app_def_list) == 0: app_def = "Experiment" else: diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index dc19f8f14..ed8c8272f 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -113,7 +113,7 @@ } -class NexusMeasurement(Measurement): +class NexusMeasurement(Measurement, Schema): def normalize(self, archive, logger): try: app_entry = getattr(self, "ENTRY") From c1bc7d57c11344281dcb5fb93f4d517209922e3a Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 17 Jan 2025 22:28:43 +0100 Subject: [PATCH 02/10] not using inner sections --- src/pynxtools/nomad/schema.py | 48 +++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index ed8c8272f..e18c12769 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -633,7 +633,9 @@ def __create_group(xml_node: ET.Element, root_section: Section): nx_type = __rename_nx_for_nomad(xml_attrs["type"]) nx_name = xml_attrs.get("name", nx_type.upper()) - section_name = __rename_nx_for_nomad(nx_name, is_group=True) + section_name = ( + root_section.name + "__" + __rename_nx_for_nomad(nx_name, is_group=True) + ) group_section = Section(validate=VALIDATE, nx_kind="group", name=section_name) __attach_base_section(group_section, root_section, __to_section(nx_type)) @@ -651,8 +653,7 @@ def __create_group(xml_node: ET.Element, root_section: Section): variable=__if_template(nx_name), ) - root_section.inner_section_definitions.append(group_section) - + __section_definitions[section_name] = group_section root_section.sub_sections.append(group_subsection) __create_group(group, group_section) @@ -707,8 +708,13 @@ def __attach_base_section(section: Section, container: Section, default: Section a base-section with a suitable base. """ try: + newdefinitions = {} + for def_name, act_def in container.all_sub_sections.items(): + newdefinitions[def_name] = act_def.sub_section base_section = nexus_resolve_variadic_name( - container.all_inner_section_definitions, section.name, filter=default + newdefinitions, + section.name.split("__")[-1], + filter=default, ) except ValueError: base_section = None @@ -855,7 +861,7 @@ def __add_section_from_nxdl(xml_node: ET.Element) -> Optional[Section]: return None -def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: +def __create_package_from_nxdl_directories() -> Package: """ Creates a metainfo package from the given nexus directory. Will generate the respective metainfo definitions from all the nxdl files in that directory. @@ -875,16 +881,28 @@ def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: sections.append(section) sections.sort(key=lambda x: x.name) + nexus_sections = {} + for section_name in ["_Applications", "_BaseSections"]: # , '_InnerSections']: + nexus_sections[section_name] = Section(validate=VALIDATE, name=section_name) + package.section_definitions.append(nexus_sections[section_name]) for section in sections: package.section_definitions.append(section) - if section.nx_category == "application" or ( - section.nx_category == "base" and section.nx_name == "NXroot" - ): - nexus_section.sub_sections.append( + if section.nx_category == "application": + nexus_sections["_Applications"].sub_sections.append( + SubSection(section_def=section, name=section.name) + ) + elif section.nx_category == "base" and section.nx_name == "NXroot": + nexus_sections["_Applications"].sub_sections.append( SubSection(section_def=section, name=section.name) ) + elif section.nx_category == "base": + nexus_sections["_BaseSections"].sub_sections.append( + SubSection(section_def=section, name=section.name) + ) + for section_name in __section_definitions: + if "__" in section_name: + package.section_definitions.append(__section_definitions[section_name]) - package.section_definitions.append(nexus_section) return package @@ -916,14 +934,6 @@ def init_nexus_metainfo(): if nexus_metainfo_package is not None: return - # We take the application definitions and create a common parent section that allows - # to include nexus in an EntryArchive. - # To be able to register it into data section, it is expected that this section inherits from Schema. - nexus_section = Section( - validate=VALIDATE, name=__GROUPING_NAME, label=__GROUPING_NAME - ) - nexus_section.base_sections = [Schema.m_def] - # try: # load_nexus_schema('') # except Exception: @@ -932,7 +942,7 @@ def init_nexus_metainfo(): # save_nexus_schema('') # except Exception: # pass - nexus_metainfo_package = __create_package_from_nxdl_directories(nexus_section) + nexus_metainfo_package = __create_package_from_nxdl_directories() nexus_metainfo_package.section_definitions.append(NexusMeasurement.m_def) # We need to initialize the metainfo definitions. This is usually done automatically, From fc5b95b1976df75849ee97ceec283ad0886f75f4 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 21 Jan 2025 18:09:46 +0100 Subject: [PATCH 03/10] fix for doc links if name contains _ --- src/pynxtools/nomad/schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index e18c12769..742933abf 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -364,7 +364,9 @@ def __get_documentation_url( ) nx_package = xml_parent.get("nxdl_base").split("/")[-1] anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) - return f"{doc_base}/{nx_package}/{anchor_segments[-1]}.html#{anchor}" + return ( + f"{doc_base}/{nx_package}/{anchor_segments[-1].replace("-", "_")}.html#{anchor}" + ) def __to_section(name: str, **kwargs) -> Section: From bfce048e016af1672848f837e53169d66ae8e9aa Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 21 Jan 2025 18:14:55 +0100 Subject: [PATCH 04/10] fix format --- src/pynxtools/nomad/schema.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 742933abf..66b6a3688 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -364,9 +364,8 @@ def __get_documentation_url( ) nx_package = xml_parent.get("nxdl_base").split("/")[-1] anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) - return ( - f"{doc_base}/{nx_package}/{anchor_segments[-1].replace("-", "_")}.html#{anchor}" - ) + nx_file = anchor_segments[-1].replace("-", "_") + return f"{doc_base}/{nx_package}/{nx_file}.html#{anchor}" def __to_section(name: str, **kwargs) -> Section: From 274e0d63718f2d9d9f01a26816c4ca7fc8cc9782 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 21 Jan 2025 18:37:49 +0100 Subject: [PATCH 05/10] linting --- src/pynxtools/nomad/schema.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 66b6a3688..8aaf5c659 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -900,9 +900,9 @@ def __create_package_from_nxdl_directories() -> Package: nexus_sections["_BaseSections"].sub_sections.append( SubSection(section_def=section, name=section.name) ) - for section_name in __section_definitions: + for section_name, section in __section_definitions.items(): if "__" in section_name: - package.section_definitions.append(__section_definitions[section_name]) + package.section_definitions.append(section) return package From 27fa71c04d436ff59e09352b9351f73e7ea5eec6 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Tue, 21 Jan 2025 20:43:06 +0100 Subject: [PATCH 06/10] fixing tests --- src/pynxtools/nomad/parser.py | 22 ++++++++++------------ tests/nomad/test_parsing.py | 8 ++++---- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 00ec67b61..dd75c96a0 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -96,19 +96,12 @@ def _to_section( nomad_def_name = rename_nx_for_nomad(nomad_def_name, is_group=True) - # for groups, get the definition from the package - new_def = current.m_def.all_sub_sections[nomad_def_name] - - new_section: MSection = None # type:ignore - - for section in current.m_get_sub_sections(new_def): - if hdf_name is None or getattr(section, "nx_name", None) == hdf_name: - new_section = section - break - - if new_section is not None: - return new_section if current == nx_root: + # for groups, get the definition from the package + new_def = current.m_def.all_sub_sections["ENTRY"] + for section in current.m_get_sub_sections(new_def): + if hdf_name is None or getattr(section, "nx_name", None) == hdf_name: + return section cls = getattr(nexus_schema, nx_def, None) sec = cls() new_def_spec = sec.m_def.all_sub_sections[nomad_def_name] @@ -117,6 +110,11 @@ def _to_section( current.ENTRY.append(new_section) new_section.__dict__["nx_name"] = hdf_name else: + # for groups, get the definition from the package + new_def = current.m_def.all_sub_sections[nomad_def_name] + for section in current.m_get_sub_sections(new_def): + if hdf_name is None or getattr(section, "nx_name", None) == hdf_name: + return section current.m_create(new_def.section_def.section_cls) new_section = current.m_get_sub_section(new_def, -1) new_section.__dict__["nx_name"] = hdf_name diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py index 8a71f9af3..b1dda4bb3 100644 --- a/tests/nomad/test_parsing.py +++ b/tests/nomad/test_parsing.py @@ -41,7 +41,7 @@ def test_nexus_example(): example_data = "src/pynxtools/data/201805_WSe2_arpes.nxs" NexusParser().parse(example_data, archive, get_logger(__name__)) - arpes_obj = getattr(archive.data, rename_nx_for_nomad("NXarpes")) + arpes_obj = archive.data assert arpes_obj.ENTRY[0].SAMPLE[0].pressure__field == ureg.Quantity( "3.27e-10*millibar" @@ -94,9 +94,9 @@ def test_nexus_example_with_renamed_groups(): os.path.dirname(__file__), "../data/nomad/NXlauetof.hdf5" ) NexusParser().parse(lauetof_data, archive, get_logger(__name__)) - lauetof_obj = getattr(archive.data, rename_nx_for_nomad("NXlauetof")) + lauetof_obj = archive.data - assert lauetof_obj.entry.name__group.time_of_flight__field == ureg.Quantity( + assert lauetof_obj.ENTRY[0].name__group.time_of_flight__field == ureg.Quantity( "1.0*second" ) - assert lauetof_obj.entry.sample.name__field == "SAMPLE-CHAR-DATA" + assert lauetof_obj.ENTRY[0].sample.name__field == "SAMPLE-CHAR-DATA" From 8fb49533810a179984acfd7686f2e4546536e648 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 24 Jan 2025 11:08:25 +0100 Subject: [PATCH 07/10] fixing mime-type for WSL --- src/pynxtools/nomad/entrypoints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pynxtools/nomad/entrypoints.py b/src/pynxtools/nomad/entrypoints.py index 66b3cb843..77ad67617 100644 --- a/src/pynxtools/nomad/entrypoints.py +++ b/src/pynxtools/nomad/entrypoints.py @@ -63,7 +63,7 @@ def load(self): name="pynxtools parser", description="A parser for nexus files.", mainfile_name_re=r".*\.nxs", - mainfile_mime_re="application/x-hdf5", + mainfile_mime_re="application/x-hdf*", ) from nomad.config.models.ui import ( From bd22578f7e69662caf7e6538bbe11f1f0c0c91b4 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Fri, 24 Jan 2025 12:38:46 +0100 Subject: [PATCH 08/10] fix for handling raw files in subdirectories --- src/pynxtools/nomad/parser.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index dd75c96a0..6298a0167 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -484,7 +484,13 @@ def parse( self._logger = logger if logger else get_logger(__name__) self._clear_class_refs() - *_, self.nxs_fname = mainfile.rsplit("/", 1) + mf = mainfile.split("/") + # if filename does not follow the pattern + # .volumes/fs/////[subdirs?]/ + if len(mf) < 7: + self.nxs_fname = mainfile + else: + self.nxs_fname = "/".join(mf[6:]) nexus_helper = HandleNexus(logger, mainfile) nexus_helper.process_nexus_master_file(self.__nexus_populate) From 8f1a0b4fbc4478a98ce2c4e7691cb167f9db0ce2 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Wed, 29 Jan 2025 18:39:38 +0100 Subject: [PATCH 09/10] use references in steps, and results in NexusMeasurement --- src/pynxtools/nomad/schema.py | 65 ++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 8aaf5c659..98dbb6c82 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -33,8 +33,9 @@ try: from nomad import utils from nomad.datamodel import EntryArchive, EntryMetadata - from nomad.datamodel.data import EntryData, Schema + from nomad.datamodel.data import ArchiveSection, EntryData, Schema from nomad.datamodel.metainfo import basesections + from nomad.datamodel.metainfo.annotations import ELNAnnotation from nomad.datamodel.metainfo.basesections import ( ActivityResult, ActivityStep, @@ -101,14 +102,37 @@ __logger = get_logger(__name__) + +class NexusActivityStep(ActivityStep): + reference = Quantity( + type=ArchiveSection, + description="A reference to a NeXus Activity Step.", + a_eln=ELNAnnotation( + component="ReferenceEditQuantity", + label="section reference", + ), + ) + + +class NexusActivityResult(ActivityResult): + reference = Quantity( + type=ArchiveSection, + description="A reference to a NeXus Activity Result.", + a_eln=ELNAnnotation( + component="ReferenceEditQuantity", + label="section reference", + ), + ) + + __BASESECTIONS_MAP: Dict[str, Any] = { "NXfabrication": [basesections.Instrument], "NXsample": [CompositeSystem], "NXsample_component": [Component], "NXidentifier": [EntityReference], - "NXentry": [ActivityStep], - "NXprocess": [ActivityStep], - "NXdata": [ActivityResult], + "NXentry": [NexusActivityStep], + "NXprocess": [NexusActivityStep], + "NXdata": [NexusActivityResult], # "object": BaseSection, } @@ -121,23 +145,21 @@ def normalize(self, archive, logger): raise AttributeError() self.steps = [] for entry in app_entry: - sec_c = entry.m_copy() - self.steps.append(sec_c) + ref = NexusActivityStep(name=entry.name, reference=entry) + self.steps.append(ref) for sec in entry.m_all_contents(): if isinstance(sec, ActivityStep): - sec_c = sec.m_copy() - self.steps.append(sec_c) + ref = NexusActivityStep(name=sec.name, reference=sec) + self.steps.append(ref) elif isinstance(sec, basesections.Instrument): - ref = InstrumentReference(name=sec.name) - ref.reference = sec + ref = InstrumentReference(name=sec.name, reference=sec) self.instruments.append(ref) elif isinstance(sec, CompositeSystem): - ref = CompositeSystemReference(name=sec.name) - ref.reference = sec + ref = CompositeSystemReference(name=sec.name, reference=sec) self.samples.append(ref) elif isinstance(sec, ActivityResult): - sec_c = sec.m_copy() - self.results.append(sec_c) + ref = NexusActivityResult(name=sec.name, reference=sec) + self.results.append(ref) if self.m_def.name == "Root": self.method = "Generic Experiment" else: @@ -158,7 +180,7 @@ def normalize(self, archive, logger): act_array = archive.workflow2.tasks existing_items = {(task.name, task.section) for task in act_array} new_items = [ - item.to_task() + item.reference.to_task() for item in self.steps if (item.name, item) not in existing_items ] @@ -177,9 +199,9 @@ def normalize(self, archive, logger): act_array = archive.workflow2.outputs existing_items = {(link.name, link.section) for link in act_array} new_items = [ - Link(name=item.name, section=item) + Link(name=item.name, section=item.reference) for item in self.results - if (item.name, item) not in existing_items + if (item.name, item.reference) not in existing_items ] act_array.extend(new_items) @@ -945,6 +967,8 @@ def init_nexus_metainfo(): # pass nexus_metainfo_package = __create_package_from_nxdl_directories() nexus_metainfo_package.section_definitions.append(NexusMeasurement.m_def) + nexus_metainfo_package.section_definitions.append(NexusActivityStep.m_def) + nexus_metainfo_package.section_definitions.append(NexusActivityResult.m_def) # We need to initialize the metainfo definitions. This is usually done automatically, # when the metainfo schema is defined though MSection Python classes. @@ -983,6 +1007,13 @@ def normalize_fabrication(self, archive, logger): current_cls = __section_definitions[ __rename_nx_for_nomad("NXfabrication") ].section_cls + self.name = ( + self.__dict__["nx_name"] + + " (" + + ((self.vendor__field + " / ") if self.vendor__field else "") + + (self.model__field if self.model__field else "") + + ")" + ) super(current_cls, self).normalize(archive, logger) From de2e94a90d228da91e923d4c537ee4ac68807b09 Mon Sep 17 00:00:00 2001 From: sanbrock Date: Thu, 30 Jan 2025 20:48:42 +0100 Subject: [PATCH 10/10] make nexus attributes searchable by importing them to NOMAD as Quantities --- src/pynxtools/nomad/parser.py | 39 ++++++++++++++++++++++------------- src/pynxtools/nomad/schema.py | 24 +++++++++++++++------ src/pynxtools/nomad/utils.py | 3 +-- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index 6298a0167..138db791e 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -202,12 +202,22 @@ def _populate_data( attr_value = attr_value[0] # so values of non-scalar attribute will not end up in metainfo! - attr_name = attr_name + "__attribute" current = _to_section(attr_name, nx_def, nx_attr, current, self.nx_root) + attribute = attr_value + # TODO: get unit from attribute _units try: if nx_root or nx_parent.tag.endswith("group"): - current.m_set_section_attribute(attr_name, attr_value) + attribute_name = "___" + attr_name + metainfo_def = resolve_variadic_name( + current.m_def.all_properties, attribute_name + ) + if metainfo_def.use_full_storage: + attribute = MQuantity.wrap(attribute, attribute_name) + current.m_set(metainfo_def, attribute) + # if attributes are set before setting the quantity, a bug can cause them being set under a wrong variadic name + attribute.m_set_attribute("m_nx_data_path", hdf_node.name) + attribute.m_set_attribute("m_nx_data_file", self.nxs_fname) else: parent_html_name = nx_path[-2].get("name") @@ -216,25 +226,26 @@ def _populate_data( metainfo_def = None try: + attribute_name = parent_html_name + "___" + attr_name metainfo_def = resolve_variadic_name( - current.m_def.all_properties, parent_field_name + current.m_def.all_properties, attribute_name + ) + data_instance_name = ( + hdf_node.name.split("/")[-1] + "___" + attr_name ) + if metainfo_def.use_full_storage: + attribute = MQuantity.wrap( + attribute, data_instance_name + ) except ValueError as exc: self._logger.warning( - f"{current.m_def} has no suitable property for {parent_field_name}", + f"{current.m_def} has no suitable property for {parent_field_name} and {attr_name} as {attribute_name}", target_name=attr_name, exc_info=exc, ) - if parent_field_name in current.__dict__: - quantity = current.__dict__[parent_field_name] - if isinstance(quantity, dict): - quantity = quantity[parent_instance_name] - else: - quantity = None - raise Warning( - "setting attribute attempt before creating quantity" - ) - quantity.m_set_attribute(attr_name, attr_value) + current.m_set(metainfo_def, attribute) + attribute.m_set_attribute("m_nx_data_path", hdf_node.name) + attribute.m_set_attribute("m_nx_data_file", self.nxs_fname) except Exception as e: self._logger.warning( "error while setting attribute", diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index 98dbb6c82..e2e3f1111 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -468,16 +468,19 @@ def __add_common_properties(xml_node: ET.Element, definition: Definition): definition.more["nx_optional"] = __if_base(xml_node) -def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantity]): +def __create_attributes( + xml_node: ET.Element, definition: Union[Section, Quantity], field: Quantity = None +): """ Add all attributes in the given nexus XML node to the given - Quantity or SubSection using the Attribute class (new mechanism). + Quantity or SubSection using a specially named Quantity class. todo: account for more attributes of attribute, e.g., default, minOccurs """ for attribute in xml_node.findall("nx:attribute", __XML_NAMESPACES): name = __rename_nx_for_nomad(attribute.get("name"), is_attribute=True) + shape: list = [] nx_enum = __get_enumeration(attribute) if nx_enum: nx_type = nx_enum @@ -496,8 +499,17 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit else: nx_shape = [] - m_attribute = Attribute( - name=name, variable=__if_template(name), shape=nx_shape, type=nx_type + a_name = (field.more["nx_name"] if field else "") + "___" + name + m_attribute = Quantity( + name=a_name, + variable=__if_template(name) + or (__if_template(field.more["nx_name"]) if field else False), + shape=shape, + type=nx_type, + flexible_unit=True, + ) + m_attribute.more.update( + dict(nx_kind="attribute") # , nx_type=nx_type, nx_shape=nx_shape) ) for name, value in attribute.items(): @@ -505,7 +517,7 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit __add_common_properties(attribute, m_attribute) - definition.attributes.append(m_attribute) + definition.quantities.append(m_attribute) def __add_additional_attributes(definition: Definition): @@ -637,7 +649,7 @@ def __create_field(xml_node: ET.Element, container: Section) -> Quantity: container.quantities.append(value_quantity) - __create_attributes(xml_node, value_quantity) + __create_attributes(xml_node, container, value_quantity) return value_quantity diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py index 794a94e60..30916ec1d 100644 --- a/src/pynxtools/nomad/utils.py +++ b/src/pynxtools/nomad/utils.py @@ -79,6 +79,5 @@ def __rename_nx_for_nomad( elif is_field: name += "__field" elif is_attribute: - name += "__attribute" - + pass return name