diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 21f6a45..6fd15f4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,7 +13,7 @@ on:  # yamllint disable-line rule:truthy
   workflow_dispatch:
 
 env:
-  POETRY_VERSION: 1.8.3
+  POETRY_VERSION: 1.8.4
   REGISTRY: ghcr.io
   IMAGE_NAME: ${{ github.repository }}
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 92b7e4e..f2d9399 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,11 +1,11 @@
 ---
 repos:
   - repo: https://github.com/python-poetry/poetry
-    rev: 1.8.3
+    rev: 1.8.4
     hooks:
       - id: poetry-check
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: check-docstring-first
       - id: check-json
@@ -16,13 +16,18 @@ repos:
       - id: name-tests-test
       - id: pretty-format-json
         args: [--autofix, --no-ensure-ascii]
+        exclude: \.ipynb$
       - id: trailing-whitespace
+  - repo: https://github.com/srstevenson/nb-clean
+    rev: 4.0.1
+    hooks:
+      - id: nb-clean
   - repo: https://github.com/facebook/usort
     rev: v1.0.8
     hooks:
       - id: usort
   - repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 24.8.0
+    rev: 24.10.0
     hooks:
       - id: black
         args: [--preview]
@@ -33,7 +38,7 @@ repos:
         args: [--exit-zero]
         verbose: true
         additional_dependencies:
-          - flake8-bugbear == 24.4.26
+          - flake8-bugbear == 24.10.31
   - repo: https://github.com/adrienverge/yamllint
     rev: v1.35.1
     hooks:
diff --git a/examples/demo.ipynb b/examples/demo.ipynb
index 033e692..9d49da6 100644
--- a/examples/demo.ipynb
+++ b/examples/demo.ipynb
@@ -1,98 +1,134 @@
 {
-  "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import requests\n",
-        "from pyambit.datamodel import Substances, Study \n",
-        "import nexusformat.nexus.tree as nx\n",
-        "import os.path\n",
-        "import tempfile\n",
-        "# to_nexus is not added without this import\n",
-        "from pyambit import nexus_writer\n",
-        "import json"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "def query(url = \"https://apps.ideaconsult.net/gracious/substance/\" ,params = {\"max\" : 1}):\n",
-        "    substances = None\n",
-        "    headers = {'Accept': 'application/json'}\n",
-        "    result = requests.get(url,params=params,headers=headers)\n",
-        "    if result.status_code==200:\n",
-        "        response =  result.json()\n",
-        "        substances = Substances.model_construct(**response)\n",
-        "        for substance in substances.substance:\n",
-        "            url_study = \"{}/study\".format(substance.URI)\n",
-        "            study = requests.get(url_study,headers=headers)\n",
-        "            if study.status_code==200:\n",
-        "                response_study = study.json()\n",
-        "                substance.study = Study.model_construct(**response_study).study\n",
-        "\n",
-        "    return substances\n",
-        "\n",
-        "def write_studies_nexus(substances):\n",
-        "    for substance in substances.substance:\n",
-        "        for study in substance.study:\n",
-        "            file = os.path.join(tempfile.gettempdir(), \"study_{}.nxs\".format(study.uuid))\n",
-        "            nxroot = nx.NXroot()\n",
-        "            try:\n",
-        "                study.to_nexus(nxroot)\n",
-        "                nxroot.save(file, mode=\"w\")\n",
-        "            except Exception as err:\n",
-        "                #print(\"error\",file,str(err))\n",
-        "                print(file)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "\n",
-        "try:\n",
-        "    substances = query(params = {\"max\" : 10})   \n",
-        "    _json = substances.model_dump(exclude_none=True)\n",
-        "    new_substances = Substances.model_construct(**_json)\n",
-        "    #test roundtrip\n",
-        "    assert substances == new_substances\n",
-        "\n",
-        "    file = os.path.join(tempfile.gettempdir(), \"remote.json\")\n",
-        "    print(file)\n",
-        "    with open(file, 'w', encoding='utf-8') as file:\n",
-        "        file.write(substances.model_dump_json(exclude_none=True))\n",
-        "    write_studies_nexus(substances)\n",
-        "except Exception as x:\n",
-        "    print(x)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": []
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": ".venv",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.12.5"
-    }
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from pyambit.datamodel import Substances, Study \n",
+    "import nexusformat.nexus.tree as nx\n",
+    "import os.path\n",
+    "import tempfile\n",
+    "# to_nexus is not added without this import\n",
+    "from pyambit import nexus_writer\n",
+    "import json\n",
+    "from IPython.display import display, HTML"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 2
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def query(url = \"https://apps.ideaconsult.net/gracious/substance/\" ,params = {\"max\" : 1}):\n",
+    "    substances = None\n",
+    "    headers = {'Accept': 'application/json'}\n",
+    "    result = requests.get(url,params=params,headers=headers)\n",
+    "    if result.status_code==200:\n",
+    "        response =  result.json()\n",
+    "        substances = Substances.model_construct(**response)\n",
+    "        for substance in substances.substance:\n",
+    "            url_study = \"{}/study?max=10000\".format(substance.URI)\n",
+    "            study = requests.get(url_study,headers=headers)\n",
+    "            if study.status_code==200:\n",
+    "                response_study = study.json()\n",
+    "                substance.study = Study.model_construct(**response_study).study\n",
+    "            #break\n",
+    "\n",
+    "    return substances\n",
+    "\n",
+    "def write_studies_nexus(substances, single_file=True):\n",
+    "    if single_file:\n",
+    "        nxroot = nx.NXroot()\n",
+    "        substances.to_nexus(nxroot)\n",
+    "        file = os.path.join(tempfile.gettempdir(), \"remote.nxs\")\n",
+    "        print(file)\n",
+    "        nxroot.save(file, mode=\"w\")\n",
+    "    else:        \n",
+    "        for substance in substances.substance:\n",
+    "            for study in substance.study:\n",
+    "                file = os.path.join(tempfile.gettempdir(), \"study_{}.nxs\".format(study.uuid))\n",
+    "                print(file)\n",
+    "                nxroot = nx.NXroot()\n",
+    "                try:\n",
+    "                    study.to_nexus(nxroot)\n",
+    "                    nxroot.save(file, mode=\"w\")\n",
+    "                except Exception as err:\n",
+    "                    #print(\"error\",file,str(err))\n",
+    "                    print(file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import traceback"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"https://apps.ideaconsult.net/gracious/substance/\"\n",
+    "#url = \"http://localhost:9090/ambit2/substance/\"\n",
+    "#url = \"http://localhost:9090/ambit2/substance/POLY-e02442cc-8f7c-3a71-82cf-7df5888a4bfa\"\n",
+    "#url = \"http://localhost:9090/ambit2/substance/POLY-25d13fa6-c18b-35c8-b0f6-7325f5f3e505\"\n",
+    "try:\n",
+    "    substances = query(url=url,params = {\"max\" : 1})   \n",
+    "    _json = substances.model_dump(exclude_none=True)\n",
+    "    new_substances = Substances.model_construct(**_json)\n",
+    "    #test roundtrip\n",
+    "    assert substances == new_substances\n",
+    "\n",
+    "    file = os.path.join(tempfile.gettempdir(), \"remote.json\")\n",
+    "    print(file)\n",
+    "    with open(file, 'w', encoding='utf-8') as file:\n",
+    "        file.write(substances.model_dump_json(exclude_none=True))\n",
+    "    \n",
+    "    for s in substances.substance:\n",
+    "        for pa in s.study:\n",
+    "            effectarrays_only, df = pa.convert_effectrecords2array()\n",
+    "            display(df.dropna(axis=1,how=\"all\"))\n",
+    "            print(effectarrays_only)\n",
+    "            #break\n",
+    "    #write_studies_nexus(substances, single_file=False)\n",
+    "except Exception as x:\n",
+    "    traceback.print_exc()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
 }
diff --git a/examples/test.py b/examples/test.py
deleted file mode 100644
index cc7945c..0000000
--- a/examples/test.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import pyambit
-
-print("test")
diff --git a/src/pyambit/datamodel.py b/src/pyambit/datamodel.py
index d96d62d..b5ab3ae 100644
--- a/src/pyambit/datamodel.py
+++ b/src/pyambit/datamodel.py
@@ -20,7 +20,7 @@
     model_validator,
 )
 
-from pyambit.ambit_deco import add_ambitmodel_method
+from pyambit.ambit_deco import add_ambitmodel_method  # noqa: F401
 
 
 class AmbitModel(BaseModel):
@@ -83,7 +83,14 @@ def model_construct(cls, **data: Any) -> "Protocol":
         return super().model_construct(**data)
 
     def __repr__(self):
-        return f"Protocol(topcategory={self.topcategory!r}, category={self.category!r}, endpoint={self.endpoint!r}, guideline={self.guideline!r})"
+        return (
+            "Protocol("
+            f"topcategory={self.topcategory!r}, "
+            f"category={self.category!r}, "
+            f"endpoint={self.endpoint!r}, "
+            f"guideline={self.guideline!r}"
+            ")"
+        )
 
     def __eq__(self, other):
         if not isinstance(other, Protocol):
@@ -139,15 +146,15 @@ def __repr__(self):
 EffectResult = create_model("EffectResult", __base__=EffectResult)
 
 
-class ValueArray(AmbitModel):
+class BaseValueArray(AmbitModel):
     unit: Optional[str] = None
     # the arrays can in fact contain strings, we don't need textValue!
     values: Union[npt.NDArray, None] = None
     errQualifier: Optional[str] = None
     errorValue: Optional[Union[npt.NDArray, None]] = None
     # but loValue - upValue need some support
-    # also loValue + textValue as used in composition data
-    auxiliary: Optional[Dict[str, npt.NDArray]] = None
+    # also loValue + textValue as used in composition / analytics data
+    # See ValueArray
 
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
@@ -158,14 +165,25 @@ def create(
         unit: str = None,
         errorValue: npt.NDArray = None,
         errQualifier: str = None,
-        auxiliary: Dict[str, npt.NDArray] = None,
     ):
         return cls(
-            values=values,
-            unit=unit,
-            errorValue=errorValue,
-            errQualifier=errQualifier,
-            auxiliary=auxiliary,
+            values=values, unit=unit, errorValue=errorValue, errQualifier=errQualifier
+        )
+
+    @classmethod
+    def model_construct(cls, **data):
+        def deserialize(value):
+            if isinstance(value, list):
+                return np.array(value)  # Convert lists back to numpy arrays
+            return value
+
+        values = deserialize(data.get("values"))
+        unit = data.get("unit")
+        errQualifier = data.get("errQualifier")
+        errorValue = deserialize(data.get("errorValue"))
+
+        return cls(
+            values=values, unit=unit, errQualifier=errQualifier, errorValue=errorValue
         )
 
     def model_dump_json(self, **kwargs) -> str:
@@ -179,27 +197,154 @@ def serialize(obj):
         return json.dumps(model_dict, default=serialize, **kwargs)
 
     def __eq__(self, other):
-        def compare_auxiliary(aux1, aux2):
-            if aux1 is aux2:
-                return True
-            if aux1 is None or aux2 is None:
-                return False
-            if aux1.keys() != aux2.keys():
-                return False
-            return all(np.array_equal(aux1[k], aux2[k]) for k in aux1)
-
-        if not isinstance(other, ValueArray):
+        if not isinstance(other, BaseValueArray):
             return False
         return (
             self.unit == other.unit
             and self.errQualifier == other.errQualifier
             and np.array_equal(self.values, other.values)
-            and compare_auxiliary(self.auxiliary, other.auxiliary)
             and np.array_equal(self.errorValue, other.errorValue)
         )
 
 
+class MetaValueArray(BaseValueArray):
+    conditions: Optional[Dict[str, str]] = None
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    @classmethod
+    def create(
+        cls,
+        values: npt.NDArray = None,
+        unit: str = None,
+        errorValue: npt.NDArray = None,
+        errQualifier: str = None,
+        conditions: Optional[Dict[str, str]] = None,
+    ):
+        return cls(
+            values=values,
+            unit=unit,
+            errorValue=errorValue,
+            errQualifier=errQualifier,
+            conditions=conditions,
+        )
+
+    @classmethod
+    def model_construct(cls, **data):
+        base_instance = super().model_construct(**data)
+        conditions = data.get("conditions", None)
+        return cls(
+            values=base_instance.values,
+            unit=base_instance.unit,
+            errorValue=base_instance.errorValue,
+            errQualifier=base_instance.errQualifier,
+            conditions=conditions,
+        )
+
+    def model_dump_json(self, **kwargs) -> str:
+        def serialize(obj):
+            if isinstance(obj, np.ndarray):
+                return obj.tolist()  # Convert NumPy arrays to lists
+            raise TypeError(f"Type {type(obj).__name__} not serializable")
+
+        model_dict = self.model_dump()
+        return json.dumps(model_dict, default=serialize, **kwargs)
+
+    def __eq__(self, other):
+        if not isinstance(other, MetaValueArray):
+            return False
+        return super().__eq__(other) and self.conditions == other.conditions
+
+
+class ValueArray(MetaValueArray):
+    auxiliary: Optional[Dict[str, Union[npt.NDArray, "MetaValueArray"]]] = None
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    @classmethod
+    def create(
+        cls,
+        values: npt.NDArray = None,
+        unit: str = None,
+        errorValue: npt.NDArray = None,
+        errQualifier: str = None,
+        conditions: Optional[Dict[str, str]] = None,
+        auxiliary: Dict[str, Union[npt.NDArray, "MetaValueArray"]] = None,
+    ):
+        return cls(
+            values=values,
+            unit=unit,
+            errorValue=errorValue,
+            errQualifier=errQualifier,
+            conditions=conditions,
+            auxiliary=auxiliary,
+        )
+
+    @classmethod
+    def model_construct(cls, **data):
+        def deserialize(value):
+            if isinstance(value, list):
+                return np.array(value)  # Convert lists back to numpy arrays
+            return value
+
+        base_data = {k: deserialize(v) for k, v in data.items() if k != "auxiliary"}
+        base_instance = MetaValueArray.model_construct(**base_data)
+        auxiliary_data = data.get("auxiliary", {})
+
+        if auxiliary_data is not None:
+            auxiliary = {}
+            for key, value in auxiliary_data.items():
+                if isinstance(
+                    value, dict
+                ):  # Check if it's a dictionary representing a MetaValueArray
+                    auxiliary[key] = MetaValueArray.model_construct(**value)
+                else:
+                    auxiliary[key] = deserialize(value)
+        else:
+            auxiliary = None
+
+        return cls(
+            values=base_instance.values,
+            unit=base_instance.unit,
+            errQualifier=base_instance.errQualifier,
+            errorValue=base_instance.errorValue,
+            conditions=base_instance.conditions,
+            auxiliary=auxiliary,
+        )
+
+    def model_dump(self):
+        base_dict = super().model_dump()
+        return {**base_dict, "auxiliary": self.auxiliary}
+
+    def __eq__(self, other):
+        if not isinstance(other, ValueArray):
+            return False
+        return super().__eq__(other) and self.compare_auxiliary(
+            self.auxiliary, other.auxiliary
+        )
+
+    @staticmethod
+    def compare_auxiliary(aux1, aux2):
+        if aux1 is aux2:
+            return True
+        if aux1 is None or aux2 is None:
+            return False
+        if aux1.keys() != aux2.keys():
+            return False
+        return all(np.array_equal(aux1[k], aux2[k]) for k in aux1)
+
+    def model_dump_json(self, **kwargs) -> str:
+        def serialize(obj):
+            if isinstance(obj, np.ndarray):
+                return obj.tolist()  # Convert NumPy arrays to lists
+            if isinstance(obj, MetaValueArray):
+                return obj.model_dump()  # Serialize BaseValueArray to a dictionary
+            raise TypeError(f"Type {type(obj).__name__} not serializable")
+
+        model_dict = self.model_dump()
+        return json.dumps(model_dict, default=serialize, **kwargs)
+
+
 class EffectRecord(AmbitModel):
+    nx_name: Optional[str] = None
     endpoint: str
     endpointtype: Optional[str] = None
     result: EffectResult = None
@@ -329,10 +474,16 @@ def __eq__(self, other):
 
     def __repr__(self):
         return (
-            f"EffectRecord(endpoint={self.endpoint!r}, endpointtype={self.endpointtype!r}, "
-            f"result={self.result!r}, conditions={self.conditions!r}, "
-            f"idresult={self.idresult!r}, endpointGroup={self.endpointGroup!r}, "
-            f"endpointSynonyms={self.endpointSynonyms!r}, sampleID={self.sampleID!r})"
+            "EffectRecord("
+            f"endpoint={self.endpoint!r}, "
+            f"endpointtype={self.endpointtype!r}, "
+            f"result={self.result!r}, "
+            f"conditions={self.conditions!r}, "
+            f"idresult={self.idresult!r}, "
+            f"endpointGroup={self.endpointGroup!r}, "
+            f"endpointSynonyms={self.endpointSynonyms!r}, "
+            f"sampleID={self.sampleID!r}"
+            ")"
         )
 
 
@@ -388,20 +539,23 @@ def model_construct(cls, **data: Any) -> "EffectArray":
                     isinstance(a, str) for a in alternatives
                 ):
                     raise ValueError(
-                        f"Alternative axes for '{primary_axis}' should be a list of strings."
+                        f"Alternative axes for '{primary_axis}' should be a list of "
+                        "strings."
                     )
 
                 # Ensure all alternative axes are present in 'axes'
                 if primary_axis not in data["axes"]:
                     raise ValueError(
-                        f"Primary axis '{primary_axis}' in axis_groups must be a key in axes."
+                        f"Primary axis '{primary_axis}' in axis_groups must be a key "
+                        "in axes."
                     )
 
                 # Validate that each alternative axis exists in 'axes'
                 for alt_axis in alternatives:
                     if alt_axis not in data["axes"]:
                         raise ValueError(
-                            f"Alternative axis '{alt_axis}' in axis_groups must be a key in axes."
+                            f"Alternative axis '{alt_axis}' in axis_groups must be a "
+                            "key in axes."
                         )
 
                 new_axis_groups[primary_axis] = alternatives
@@ -421,12 +575,19 @@ def __eq__(self, other):
         )
 
     def __repr__(self):
+        repr_endpointtype = repr(self.endpointtype) if self.endpointtype else ""
         repr_signal = repr(self.signal) if self.signal else "None"
         repr_axes = repr(self.axes) if self.axes else "None"
         repr_axis_groups = repr(self.axis_groups) if self.axis_groups else "None"
         return (
-            f"EffectArray(signal={repr_signal}, axes={repr_axes}, "
-            f"axis_groups={repr_axis_groups}, {super().__repr__()})"
+            "EffectArray("
+            f"endpoint={self.endpoint}, "
+            f"endpointtype={repr_endpointtype}, "
+            f"signal={repr_signal}, "
+            f"axes={repr_axes}, "
+            f"axis_groups={repr_axis_groups}, "
+            f"{super().__repr__()}"
+            ")"
         )
 
 
@@ -463,9 +624,13 @@ def __eq__(self, other):
 
     def __repr__(self):
         return (
-            f"ProtocolEffectRecord(protocol={self.protocol}, documentUUID={self.documentUUID}, "
-            f"studyResultType={self.studyResultType}, interpretationResult={self.interpretationResult}, "
-            f"{super().__repr__()})"
+            "ProtocolEffectRecord("
+            f"protocol={self.protocol}, "
+            f"documentUUID={self.documentUUID}, "
+            f"studyResultType={self.studyResultType}, "
+            f"interpretationResult={self.interpretationResult}, "
+            f"{super().__repr__()}"
+            ")"
         )
 
 
@@ -505,12 +670,14 @@ def __eq__(self, other):
 
     def __repr__(self):
         return (
-            f"ReliabilityParams(r_isRobustStudy={self.r_isRobustStudy}, "
+            "ReliabilityParams("
+            f"r_isRobustStudy={self.r_isRobustStudy}, "
             f"r_isUsedforClassification={self.r_isUsedforClassification}, "
             f"r_isUsedforMSDS={self.r_isUsedforMSDS}, "
             f"r_purposeFlag={self.r_purposeFlag}, "
             f"r_studyResultType={self.r_studyResultType}, "
-            f"r_value={self.r_value})"
+            f"r_value={self.r_value}"
+            ")"
         )
 
 
@@ -533,7 +700,13 @@ def __eq__(self, other):
         )
 
     def __repr__(self):
-        return f"Citation(year={self.year}, title={self.title}, " f"owner={self.owner})"
+        return (
+            "Citation("
+            f"year={self.year}, "
+            f"title={self.title}, "
+            f"owner={self.owner}"
+            ")"
+        )
 
 
 Citation = create_model("Citation", __base__=Citation)
@@ -625,6 +798,7 @@ class ProtocolApplication(AmbitModel):
     """
 
     uuid: Optional[str] = None
+    nx_name: Optional[str] = None
     # reliability: Optional[ReliabilityParams]
     interpretationResult: Optional[str] = None
     interpretationCriteria: Optional[str] = None
@@ -725,13 +899,19 @@ def __eq__(self, other):
 
     def __repr__(self):
         return (
-            f"ProtocolApplication(uuid={self.uuid!r}, "
+            "ProtocolApplication("
+            f"uuid={self.uuid!r}, "
             f"interpretationResult={self.interpretationResult!r}, "
             f"interpretationCriteria={self.interpretationCriteria!r}, "
-            f"parameters={self.parameters!r}, citation={self.citation!r}, "
-            f"effects={self.effects!r}, owner={self.owner!r}, "
-            f"protocol={self.protocol!r}, investigation_uuid={self.investigation_uuid!r}, "
-            f"assay_uuid={self.assay_uuid!r}, updated={self.updated!r})"
+            f"parameters={self.parameters!r}, "
+            f"citation={self.citation!r}, "
+            f"effects={self.effects!r}, "
+            f"owner={self.owner!r}, "
+            f"protocol={self.protocol!r}, "
+            f"investigation_uuid={self.investigation_uuid!r}, "
+            f"assay_uuid={self.assay_uuid!r}, "
+            f"updated={self.updated!r}"
+            ")"
         )
 
     def create_multidimensional_matrix(
@@ -779,13 +959,22 @@ def create_multidimensional_matrix(
         # Determine the shape of the multidimensional matrix
         shape = tuple(len(values) for values in axis_values)
         # Initialize the multidimensional matrix with NaNs
-        matrix = np.full(shape, "" if signal_col == "textValue" else np.nan)
+        if signal_col == "textValue":
+            matrix = np.full(shape, "")
+        else:
+            matrix = np.full(shape, np.nan)
         matrix_errors = None if errors_col is None else np.full(shape, np.nan)
 
         auxsignals = {}
         if auxsignal_cols:
             for a in auxsignal_cols:
-                auxsignals[a] = np.full(shape, "" if a == "textValue" else np.nan)
+                if a == "textValue":
+                    _arr = np.empty(shape, dtype=object)
+                    if len(shape) > 0:
+                        _arr[:] = ""
+                    auxsignals[a] = _arr
+                else:
+                    auxsignals[a] = np.full(shape, np.nan)
 
         # Populate the matrix with signal values
         for _, row in df.iterrows():
@@ -804,10 +993,14 @@ def create_multidimensional_matrix(
                 if auxsignal_cols:
                     for a in auxsignal_cols:
                         if not pd.isna(row[a]):
-                            auxsignals[a][indices] = row[a]
-            except Exception as x:
-                print("matrix", self.uuid)
-                print(row)
+                            if isinstance(row[a], bytes):
+                                auxsignals[a][indices] = row[a].decode("utf-8")
+                            else:
+                                auxsignals[a][indices] = row[a]
+            except:  # noqa: B001,E722 FIXME
+                # print("matrix", self.uuid)
+                # print(row)
+                print(axis_indices)
                 print(primary_axis_cols)
                 print(traceback.format_exc())
 
@@ -815,9 +1008,10 @@ def create_multidimensional_matrix(
             unique_values = sorted(df[axis].unique())
             axes[axis].values = unique_values
 
-        # Collect alternative axis values - tbd - sorting may change order of alternative axes!
+        # Collect alternative axis values - tbd - sorting may change order of
+        # alternative axes!
         if alt_axes is not None:
-            for primary_axis, alt_cols in alt_axes.items():
+            for _primary_axis, alt_cols in alt_axes.items():
                 for alt_col in alt_cols:
                     if alt_col in df.columns:
                         _tmp = sorted(df[alt_col].unique())
@@ -843,8 +1037,10 @@ def convert_effectrecords2array(self):
         if len(_nonnumcols) > 0:
             df_set = split_df_by_columns(_df, _nonnumcols)
         # debug
+        # here the null columns (e.g. replicates) are lost
+        # print(df_set)
 
-        for key, df in df_set.items():
+        for _key, df in df_set.items():
             # df.to_excel("{}_{}.xlsx".format(self.uuid,key),index=False)
 
             for endpointtype in df["endpointtype"].unique():
@@ -901,7 +1097,9 @@ def convert_effectrecords2array(self):
                                     try:
                                         _f["loValue"] = _f["loValue"].fillna(_tmp[_col])
                                     except Exception as x:
-                                        # print(_f['loValue'].apply(type).value_counts())
+                                        # print(
+                                        #     _f['loValue'].apply(type).value_counts()
+                                        # )
                                         print(x)
                                         print(_col, _f["loValue"], self.uuid)
 
@@ -922,20 +1120,20 @@ def convert_effectrecords2array(self):
                             if _tmp["loValue"].dropna().empty
                             else transform_array(_tmp["loValue"].values)
                         )
-                        loQualifier = (
-                            None
-                            if _tmp["loQualifier"].dropna().empty
-                            else transform_array(_tmp["loQualifier"].values)
-                        )
-                        upQualifier = (
-                            None
-                            if _tmp["upQualifier"].dropna().empty
-                            else transform_array(_tmp["upQualifier"].values)
-                        )
-
-                        errqualifier = _tmp["errQualifier"].unique()[
-                            0
-                        ]  # if _tmp["errQualifier"].nunique() == 1 else _tmp["errQualifier"]
+                        # _loQualifier = (
+                        #     None
+                        #     if _tmp["loQualifier"].dropna().empty
+                        #     else transform_array(_tmp["loQualifier"].values)
+                        # )
+                        # _upQualifier = (
+                        #     None
+                        #     if _tmp["upQualifier"].dropna().empty
+                        #     else transform_array(_tmp["upQualifier"].values)
+                        # )
+
+                        errqualifier = _tmp["errQualifier"].unique()[0]
+                        # if _tmp["errQualifier"].nunique() == 1
+                        # else _tmp["errQualifier"]
 
                         # df_axes["loValue"] = loValues
                         auxsignal_cols = []
@@ -953,40 +1151,70 @@ def convert_effectrecords2array(self):
                                     auxsignal_cols.append(tag)
                                 df_axes[tag] = _values
 
-                        if _tmp["errorValue"].dropna().empty:
-                            error_col = None
+                        if df_axes.isna().any().any():
+                            # for some reason there are still nan values
+                            axes_all = []
+                            nan_columns = df_axes.columns[df_axes.isna().any()].tolist()
+                            df_axes_nan = df_axes[
+                                df_axes[nan_columns].isna().any(axis=1)
+                            ]
+                            df_axes_nan = df_axes_nan.dropna(axis=1, how="all")
+                            df_axes_not_nan = df_axes[
+                                df_axes[nan_columns].notna().all(axis=1)
+                            ]
+                            if not df_axes_not_nan.empty:
+                                axes_all.append(df_axes_not_nan)
+                                # print(print(df_axes_not_nan))
+                            if not df_axes_nan.empty:
+                                # ignore for now
+                                # axes_all.append(df_axes_nan)
+                                print(df_axes_nan)
                         else:
-                            error_col = "errorValue"
-                            df_axes[error_col] = _tmp[error_col]
-
-                        matrix, axes, matrix_errors, auxsignals = (
-                            self.create_multidimensional_matrix(
-                                df_axes,
-                                signal_col,
-                                axes,
-                                alt_axes,
-                                error_col,
-                                auxsignal_cols,
+                            axes_all = [df_axes]
+
+                        for df_axes in axes_all:
+                            if _tmp["errorValue"].dropna().empty:
+                                error_col = None
+                            else:
+                                error_col = "errorValue"
+                                df_axes[error_col] = _tmp[error_col]
+
+                            matrix, axes, matrix_errors, auxsignals = (
+                                self.create_multidimensional_matrix(
+                                    df_axes,
+                                    signal_col,
+                                    axes,
+                                    alt_axes,
+                                    error_col,
+                                    auxsignal_cols,
+                                )
                             )
-                        )
-
-                        earray = EffectArray(
-                            endpoint=endpoint,
-                            endpointtype=endpointtype,
-                            conditions=new_conditions,
-                            signal=ValueArray(
-                                unit=unit,
-                                # values=textValue if loValues is None else loValues,
-                                values=matrix,
-                                errQualifier=errqualifier,
-                                errorValue=matrix_errors,
-                                auxiliary=auxsignals,
-                            ),
-                            axes=axes,
-                            axis_groups=alt_axes,
-                        )
-                        arrays.append(earray)
-                        # print(earray)
+                            # Remove items where the value is None or NaN
+                            new_conditions = {
+                                k: v
+                                for k, v in new_conditions.items()
+                                if v is not None
+                                and not (isinstance(v, float) and np.isnan(v))
+                            }
+
+                            earray = EffectArray(
+                                endpoint=endpoint,
+                                endpointtype=endpointtype,
+                                conditions=new_conditions,
+                                signal=ValueArray(
+                                    unit=unit,
+                                    # values=textValue if loValues is None
+                                    # else loValues,
+                                    values=matrix,
+                                    errQualifier=errqualifier,
+                                    errorValue=matrix_errors,
+                                    auxiliary=auxsignals,
+                                ),
+                                axes=axes,
+                                axis_groups=alt_axes,
+                            )
+                            arrays.append(earray)
+                            # print(earray)
         return arrays, _df
 
 
@@ -1008,7 +1236,7 @@ class Study(AmbitModel):
         papps = Study(**parsed_json)
         for papp in papps:
             print(papp)
-    """
+    """  # noqa: B950
 
     study: List[ProtocolApplication]
 
@@ -1123,9 +1351,17 @@ def __eq__(self, other: Any) -> bool:
 
     def __repr__(self) -> str:
         return (
-            f"Compound(URI={self.URI}, structype={self.structype}, metric={self.metric}, "
-            f"name={self.name}, cas={self.cas}, einecs={self.einecs}, "
-            f"inchikey={self.inchikey}, inchi={self.inchi}, formula={self.formula})"
+            "Compound("
+            f"URI={self.URI}, "
+            f"structype={self.structype}, "
+            f"metric={self.metric}, "
+            f"name={self.name}, "
+            f"cas={self.cas}, "
+            f"einecs={self.einecs}, "
+            f"inchikey={self.inchikey}, "
+            f"inchi={self.inchi}, "
+            f"formula={self.formula}"
+            ")"
         )
 
 
@@ -1316,10 +1552,19 @@ def __eq__(self, other):
 
     def __repr__(self):
         return (
-            f"SubstanceRecord(URI={self.URI}, ownerUUID={self.ownerUUID}, ownerName={self.ownerName}, "
-            f"i5uuid={self.i5uuid}, name={self.name}, publicname={self.publicname}, format={self.format}, "
-            f"substanceType={self.substanceType}, referenceSubstance={self.referenceSubstance}, "
-            f"study={self.study}, composition={self.composition})"
+            "SubstanceRecord("
+            f"URI={self.URI}, "
+            f"ownerUUID={self.ownerUUID}, "
+            f"ownerName={self.ownerName}, "
+            f"i5uuid={self.i5uuid}, "
+            f"name={self.name}, "
+            f"publicname={self.publicname}, "
+            f"format={self.format}, "
+            f"substanceType={self.substanceType}, "
+            f"referenceSubstance={self.referenceSubstance}, "
+            f"study={self.study}, "
+            f"composition={self.composition}"
+            ")"
         )
 
 
@@ -1410,7 +1655,7 @@ def transform_array(arr):
     if any_strings:
         try:
             return pd.to_numeric(arr, errors="raise")
-        except Exception as e:
+        except Exception:
             _converted = np.array(
                 [
                     (
@@ -1487,18 +1732,19 @@ def is_string_only(series):
         # Check if all values in the series are either strings or NaN
         return series.apply(lambda x: isinstance(x, str) or pd.isna(x)).all()
 
-    # Use list comprehension to check if each column is string only and cannot be converted to numeric
+    # Use list comprehension to check if each column is string only and cannot be
+    # converted to numeric.
     string_only_cols = [
         col
         for col in object_cols
         if is_string_only(df[col])
         and pd.to_numeric(df[col], errors="coerce").isna().all()
     ]
-
+    # print(string_only_cols)
     return string_only_cols
 
 
-def split_df_by_columns(df, columns):
+def split_df_by_columns_bad_with_nans(df, columns):
     # Create a dictionary to hold the split DataFrames
     split_dfs = {}
 
@@ -1517,3 +1763,29 @@ def split_df_by_columns(df, columns):
         split_dfs[key] = split_df
 
     return split_dfs
+
+
+def split_df_by_columns(df, columns):
+    # Create a dictionary to hold the split DataFrames
+    split_dfs = {}
+
+    # Identify unique combinations of values for the specified columns
+    unique_combinations = df[columns].drop_duplicates()
+
+    for _, row in unique_combinations.iterrows():
+        # Create a filter condition that treats NaN as equal
+        filter_condition = pd.DataFrame(
+            {
+                col: (df[col] == row[col]) | (pd.isna(df[col]) & pd.isna(row[col]))
+                for col in columns
+            }
+        ).all(axis=1)
+
+        # Create a new DataFrame for this combination
+        split_df = df[filter_condition]
+
+        # Use a tuple of the unique values as the key, treating NaN gracefully
+        key = tuple(row)
+        split_dfs[key] = split_df
+
+    return split_dfs
diff --git a/src/pyambit/nexus_parser.py b/src/pyambit/nexus_parser.py
index 1af541e..2a006a0 100644
--- a/src/pyambit/nexus_parser.py
+++ b/src/pyambit/nexus_parser.py
@@ -1,113 +1,211 @@
-import h5py
-import ramanchada2 as rc2
-
-
-class NexusParser:
-    def __init__(self):
-        self.parsed_objects = {}
-
-    def parse_data(self, entry, default=False, nxprocess=False):
-        for attr in entry.attrs:
-            print(attr, entry.attrs.get(attr))
-        for _, item in entry.items():
-            nx_class = item.attrs.get("NX_class", None)
-            print("PROCESSED " if nxprocess else "", "DATA ", item.name, " ", nx_class)
-
-    def parse_entry(self, entry, nxprocess=False, dataparser=None):
-        print(dataparser)
-        nx_class = entry.attrs.get("NX_class", None)
-        default = entry.attrs.get("default", None)
-        # print(entry.name, ' ', nx_class, default)
-        for _, item in entry.items():
-            nx_class = item.attrs.get("NX_class", None)
-            if nx_class == "NXdata":
-                if dataparser is None:
-                    self.parse_data(item, entry.name == default, nxprocess)
-                else:
-                    print("dataparsre", dataparser)
-                    dataparser(item, entry.name == default, nxprocess)
-
-            elif nx_class == "NXenvironment":
-                pass
-            elif nx_class == "NXinstrument":
-                pass
-            elif nx_class == "NXcite":
-                pass
-            elif nx_class == "NXcollection":
-                pass
-            elif nx_class == "NXnote":
-                pass
-            elif nx_class == "NXsample":
-                self.parse_sample(item)
-            else:
-                print("ENTRY ", item.name, " ", nx_class)
+import traceback
+from typing import Dict
 
-    def parse_sample(self, group):
-        nx_class = group.attrs.get("NX_class", None)
-        if nx_class == "NXsample_component":
-            pass
-        else:
-            print(group.name, " ", nx_class)
+import nexusformat.nexus as nx
 
-    def parse(self, file_path: str, dataparser=None):
-        with h5py.File(file_path, "r") as file:
-            self.parse_h5(file, dataparser)
+from pyambit.datamodel import (
+    Citation,
+    EffectRecord,
+    EffectResult,
+    EndpointCategory,
+    Protocol,
+    ProtocolApplication,
+    SampleLink,
+    SubstanceRecord,
+    Substances,
+    Value,
+)
 
-    def parse_h5(self, h5_file, dataparser=None):
-        try:
 
-            def iterate_groups(group, indent="", nxprocess=False):
-                nx_class = group.attrs.get("NX_class", None)
-                if nx_class == "NXentry" or nx_class == "NXsubentry":
-                    self.parse_entry(group, nxprocess, dataparser)
-                elif nx_class == "NXsample":
-                    self.parse_sample(group)
-
-                else:
-                    for name, item in group.items():
-                        nx_class = item.attrs.get("NX_class", None)
-                        if isinstance(item, h5py.Group):
-                            # print(indent + 'Group:', name, ' ', nx_class)
-                            # Recursively call the function for nested groups
-                            iterate_groups(
-                                item,
-                                indent + "  ",
-                                nxprocess or nx_class == "NX_process",
-                            )
-                        else:
-                            print(indent + "Dataset:", name, " ", nx_class)
-
-            # Start the iteration from the root of the file
-            iterate_groups(h5_file)
-        except Exception as err:
-            print(err)
+class Nexus2Ambit:
+
+    def __init__(self, domain: str, index_only: True):
+        self.substances: Dict[str, SubstanceRecord] = {}
+        self.domain = domain
+        self.index_only = index_only
 
+    def __enter__(self):
+        self.clear()
+        return self
 
-class SpectrumParser(NexusParser):
-    def __init__(self):
-        super().__init__()
-        # Replace the parent class field with the spectrum-specific field
-        self.parsed_objects = {}
+    def __exit__(self, exc_type, exc_value, traceback):
+        # Any cleanup code, if needed
+        pass
+
+    def clear(self):
+        self.substances = {}
+
+    def substance_from_nexus(self, nxentry: nx.NXentry) -> SubstanceRecord:
+        try:
+            record = SubstanceRecord(
+                URI=None,
+                ownerUUID=nxentry.attrs["owner-uuid"],
+                ownerName=nxentry.attrs["ownerName"],
+                i5uuid=nxentry.attrs["uuid"],
+                name=nxentry["name"].nxdata,
+                publicname=nxentry.attrs["publicname"],
+                format="NeXus",
+                substanceType="CHEBI_59999",
+                referenceSubstance=None,
+                study=[],
+                composition=None,
+            )
+            return record
+        except Exception as err:
+            print(traceback.format_exc())
+            raise err
+
+    def parse_substances(self, nxentry: nx.NXentry):
+        for _entry_name, entry in nxentry.items():
+            if isinstance(entry, nx.NXsample):
+                record: SubstanceRecord = self.substance_from_nexus(entry)
+                if record.i5uuid not in self.substances:
+                    self.substances[record.i5uuid] = record
+
+    def parse_studies(self, nxroot: nx.NXroot, relative_path: str):
+        for entry_name, entry in nxroot.items():
+            if entry_name != "substance":
+                papp: ProtocolApplication = self.parse_entry(entry, relative_path)
+                if papp.owner.substance.uuid in self.substances:
+                    self.substances[papp.owner.substance.uuid].study.append(papp)
+
+    def parse(self, nxroot: nx.NXroot, relative_path: str):
+        for entry_name, entry in nxroot.items():
+            if entry_name == "substance":
+                self.parse_substances(entry)
+        self.parse_studies(nxroot, relative_path)
+
+    def get_substances(self):
+        return Substances(substance=self.substances.values())
+
+    def parse_entry(
+        self, nxentry: nx.NXentry, relative_path: str
+    ) -> ProtocolApplication:
+        dox = nxentry.get("experiment_documentation", None)
+        protocol = None
+        parameters = {}
+        if dox is not None:
+            _protocol = dox.get("protocol", None)
+            if _protocol is None:
+                pass
+            else:
+                protocol = Protocol(
+                    topcategory=_protocol.attrs["topcategory"],
+                    category=EndpointCategory(code=_protocol.attrs["code"]),
+                    endpoint=(
+                        _protocol.attrs["endpoint"]
+                        if "endpoint" in _protocol.attrs
+                        else None
+                    ),
+                    guideline=[_protocol.attrs["guideline"]],
+                )
+        if protocol is None:
+            if nxentry["definition"].nxvalue == "NXraman":
+                protocol = protocol = Protocol(
+                    "P-CHEM", "ANALYTICAL_METHODS_SECTION", "", ["Raman spectroscopy"]
+                )
+                parameters["E.method"] = nxentry["definition"].nxvalue
+            else:
+                protocol = protocol = Protocol("P-CHEM", "UNKNOWN", "", ["UNKNOWN"])
 
-    def parse_data(self, entry, default=False, nxprocess=False):
+        _reference = nxentry.get("reference")
+        citation = Citation(
+            year=_reference["year"].nxdata,
+            title=_reference["title"].nxdata,
+            owner=_reference["owner"].nxdata,
+        )
 
-        signal = entry.attrs.get("signal", None)
-        # interpretation = entry.attrs.get("interpretation", None)
-        axes = entry.attrs.get("axes", None)
-        # print(default,signal,interpretation,axes,isinstance(entry[signal], h5py.Dataset))
-        y = entry[signal][:]
-        for axis in axes:
-            x = entry[axis][:]
-            break
-        spe = rc2.spectrum.Spectrum(x=x, y=y)
-        self.parsed_objects[str(entry)] = spe
+        try:
+            wl = nxentry["instrument/beam_incident/wavelength"].nxdata
+            wl_unit = nxentry["instrument/beam_incident/wavelength"].attrs["unit"]
+            parameters["wavelength"] = Value(loValue=wl, unit=wl_unit)
+        except:  # noqa: B001,E722 FIXME
+            parameters["wavelength"] = None
 
+        try:
+            instrument_model = nxentry["instrument/device_information/model"].nxvalue
+            instrument_vendor = nxentry["instrument/device_information/vendor"].nxvalue
+            parameters["instrument"] = "{} {}".format(
+                instrument_vendor, instrument_model
+            )
+        except:  # noqa: B001,E722 FIXME
+            pass
 
-# spectrum_parser = SpectrumParser()
-# spectrum_parser.parse(file_path)
+        try:
+            parameters["E.method"] = nxentry[
+                "experiment_documentation/E.method"
+            ].nxvalue
+        except Exception:
+            parameters["E.method"] = nxentry["definition"].nxvalue
 
-# Access the spectrum data
-# for key in spectrum_parser.parsed_objects:
-#    spe = spectrum_parser.parsed_objects[key]
-#    print("Spectrum data", key, spe)
-#    spe.plot()
+        # the sample
+        try:
+            _owner = SampleLink.create(
+                sample_uuid=nxentry["sample/substance"].attrs["uuid"],
+                sample_provider=nxentry["sample/provider"].nxdata,
+            )
+        except Exception as err:
+            raise ValueError(err)
+
+        papp: ProtocolApplication = ProtocolApplication(
+            uuid=nxentry.get("entry_identifier_uuid").nxvalue,
+            interpretationResult=None,
+            interpretationCriteria=None,
+            parameters=parameters,
+            citation=citation,
+            effects=[],
+            owner=_owner,
+            protocol=protocol,
+            investigation_uuid=nxentry.get("collection_identifier").nxvalue,
+            assay_uuid=nxentry.get("experiment_identifier").nxvalue,
+            updated=None,
+        )
+        for endpointtype_name, enddpointtype_group in nxentry.items():
+
+            if isinstance(enddpointtype_group, nx.NXsample):
+                continue
+            elif isinstance(enddpointtype_group, nx.NXcite):
+                continue
+            elif isinstance(enddpointtype_group, nx.NXinstrument):
+                continue
+            elif isinstance(enddpointtype_group, nx.NXcollection):
+                continue
+            elif isinstance(enddpointtype_group, nx.NXenvironment):
+                continue
+            elif isinstance(enddpointtype_group, nx.NXnote):
+                continue
+            elif isinstance(enddpointtype_group, nx.NXgroup):
+                pass
+            elif isinstance(enddpointtype_group, nx.NXprocess):
+                pass
+            else:
+                continue
+            for _name_data, data in enddpointtype_group.items():
+                if isinstance(data, nx.NXdata):
+                    if self.index_only:
+                        papp.effects.append(
+                            self.parse_effect(endpointtype_name, data, relative_path)
+                        )
+                    else:
+                        raise NotImplementedError("Not implemented")
+
+        return papp
+
+    def parse_effect(
+        self, endpointtype_name, data: nx.NXentry, relative_path: str
+    ) -> EffectRecord:
+        if self.index_only:
+            return EffectRecord(
+                endpoint=data.attrs["signal"],
+                endpointtype=endpointtype_name,
+                result=EffectResult(
+                    textValue="{}/{}#{}".format(self.domain, relative_path, data.nxpath)
+                ),
+                conditions={},
+                idresult=None,
+                endpointGroup=None,
+                endpointSynonyms=[],
+                sampleID=None,
+            )
+        else:
+            raise NotImplementedError("Not implemented")
diff --git a/src/pyambit/nexus_spectra.py b/src/pyambit/nexus_spectra.py
index 7ec70d7..1315ea2 100644
--- a/src/pyambit/nexus_spectra.py
+++ b/src/pyambit/nexus_spectra.py
@@ -1,13 +1,14 @@
 import uuid
 from datetime import datetime
-from typing import Dict, List, Union
+from typing import Dict
 
 import nexusformat.nexus.tree as nx
 import numpy as np
 import numpy.typing as npt
 
 import pyambit.datamodel as mx
-from pyambit.nexus_writer import to_nexus
+
+from pyambit.nexus_writer import to_nexus  # noqa: F401
 
 
 def spe2effect(
@@ -17,9 +18,17 @@ def spe2effect(
     endpointtype="RAW_DATA",
     meta: Dict = None,
 ):
-    data_dict: Dict[str, mx.ValueArray] = {"x": mx.ValueArray(values=x, unit=unit)}
+    try:
+        signal = meta["@signal"]
+    except KeyError:
+        signal = "y"
+    try:
+        axes = meta["@axes"]
+    except KeyError:
+        axes = ["y"]
+    data_dict: Dict[str, mx.ValueArray] = {axes[0]: mx.ValueArray(values=x, unit=unit)}
     return mx.EffectArray(
-        endpoint="y",
+        endpoint=signal,
         endpointtype=endpointtype,
         signal=mx.ValueArray(values=y, unit="count"),
         axes=data_dict,
@@ -28,15 +37,15 @@ def spe2effect(
 
 def configure_papp(
     papp: mx.ProtocolApplication = None,
-    instrument=None,
+    instrument=("vendor", "model"),
     wavelength=None,
-    provider="FNMT",
+    provider="ABCD",
     sample="PST",
-    sample_provider="CHARISMA",
-    investigation="Round Robin 1",
+    sample_provider="TEST",
+    investigation="My investigation",
     citation: mx.Citation = None,
-    prefix="CRMA",
-    meta=None,
+    prefix="TEST",
+    meta: Dict = None,
 ):
     if papp is None:
         papp = mx.ProtocolApplication(
@@ -57,10 +66,16 @@ def configure_papp(
         uuid.uuid5(uuid.NAMESPACE_OID, "{} {}".format(investigation, provider))
     )
     papp.parameters = {
-        "E.method": "Raman spectrometry",
-        "wavelength": wavelength,
-        "T.instrument_model": instrument,
+        "/experiment_documentation/E.method": "Raman spectroscopy",
+        "/experiment_type": "Raman spectroscopy",
+        "instrument/beam_incident/wavelength": mx.Value(loValue=wavelength, unit="nm"),
+        "instrument/device_information/vendor": instrument[0],
+        "instrument/device_information/model": instrument[1],
+        "/definition": "NXraman",
     }
+    for key in list(meta.keys()):
+        if not key.startswith("@"):
+            papp.parameters["/parameters/{}".format(key)] = meta[key]
 
     papp.uuid = "{}-{}".format(
         prefix,
diff --git a/src/pyambit/nexus_writer.py b/src/pyambit/nexus_writer.py
index 88f7da3..78428c4 100644
--- a/src/pyambit/nexus_writer.py
+++ b/src/pyambit/nexus_writer.py
@@ -1,28 +1,78 @@
 import math
-import numbers
 import re
 import traceback
 from typing import Dict, List
 
 import nexusformat.nexus as nx
 import numpy as np
-import pandas as pd
 
-from pyambit.ambit_deco import add_ambitmodel_method
+from h5py import string_dtype
 
-# from pydantic import validate_arguments
+from pyambit.ambit_deco import add_ambitmodel_method
 
 from pyambit.datamodel import (
     Composition,
     EffectArray,
-    effects2df,
+    MetaValueArray,
     ProtocolApplication,
     Study,
     SubstanceRecord,
     Substances,
     Value,
+    ValueArray,
 )
 
+# tbd parameterize
+
+
+def param_lookup(prm, value):
+    target = ["environment"]
+    _prmlo = prm.lower()
+    if "instrument" in _prmlo:
+        target = ["instrument"]
+    elif "technique" in _prmlo:
+        target = ["instrument"]
+    elif "wavelength" in _prmlo:
+        target = ["instrument", "beam_incident"]
+    elif "sample" in _prmlo:
+        target = ["sample"]
+    elif "material" in _prmlo:
+        target = ["sample"]
+    elif "dispers" in _prmlo:
+        target = ["sample"]
+    elif "vortex" in _prmlo:
+        target = ["sample"]
+    elif "stirr" in _prmlo:
+        target = ["sample"]
+    elif ("ASSAY" == prm.upper()) or ("E.METHOD" == prm.upper()):
+        target = ["experiment_documentation"]
+    elif "E.SOP_REFERENCE" == prm:
+        target = ["experiment_documentation"]
+    elif "OPERATOR" == prm:
+        target = ["experiment_documentation"]
+    elif prm.startswith("T."):
+        target = ["instrument"]
+    elif prm.startswith("E."):
+        target = ["environment"]
+    elif "medium" in _prmlo:
+        target = ["environment"]
+    elif "cell" in _prmlo:
+        target = ["environment"]
+    elif "well" in _prmlo:
+        target = ["environment"]
+    elif "animal" in _prmlo:
+        target = ["environment"]
+    elif "EXPERIMENT_END_DATE" == prm:
+        target = ["end_time"]
+    elif "EXPERIMENT_START_DATE" == prm:
+        target = ["start_time"]
+    elif "__input_file" == prm:
+        target = ["experiment_documentation"]
+    else:
+        target = ["parameters"]
+    target.append(prm)
+    return target
+
 
 @add_ambitmodel_method(ProtocolApplication)
 def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=False):
@@ -56,9 +106,9 @@ def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=Fal
     try:
         _categories_collection = ""
         if hierarchy:
-            if not papp.protocol.topcategory in nx_root:
+            if papp.protocol.topcategory not in nx_root:
                 nx_root[papp.protocol.topcategory] = nx.NXgroup()
-            if not papp.protocol.category.code in nx_root[papp.protocol.topcategory]:
+            if papp.protocol.category.code not in nx_root[papp.protocol.topcategory]:
                 nx_root[papp.protocol.topcategory][
                     papp.protocol.category.code
                 ] = nx.NXgroup()
@@ -71,12 +121,21 @@ def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=Fal
                 if papp.citation.owner is None
                 else papp.citation.owner.replace("/", "_").upper()
             )
-        except BaseException:
+        except BaseException:  # noqa: B036 FIXME
             provider = "@"
-        entry_id = "{}/entry_{}_{}".format(_categories_collection, provider, papp.uuid)
-    except Exception as err:
+        if papp.nx_name is None:
+            entry_id = "{}/{}_{}".format(_categories_collection, provider, papp.uuid)
+        else:
+            entry_id = "{}/{}_{}".format(
+                _categories_collection,
+                "entry" if papp.nx_name is None else papp.nx_name,
+                papp.uuid,
+            )
+    except Exception:
         # print(err)
-        entry_id = "/entry_{}".format(papp.uuid)
+        entry_id = "/{}_{}".format(
+            "entry" if papp.nx_name is None else papp.nx_name, papp.uuid
+        )
 
     _categories_collection = "{}{}".format(_categories_collection, entry_id)
     if entry_id not in nx_root:
@@ -119,12 +178,25 @@ def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=Fal
             experiment_documentation["date"] = papp.updated
             # category = nx.NXgroup()
             # experiment_documentation["category"] = category
-            experiment_documentation.attrs["topcategory"] = papp.protocol.topcategory
-            experiment_documentation.attrs["code"] = papp.protocol.category.code
-            experiment_documentation.attrs["term"] = papp.protocol.category.term
-            experiment_documentation.attrs["title"] = papp.protocol.category.title
-            experiment_documentation.attrs["endpoint"] = papp.protocol.endpoint
-            experiment_documentation.attrs["guideline"] = papp.protocol.guideline
+            experiment_documentation["protocol"] = nx.NXcollection()
+            experiment_documentation["protocol"].attrs[
+                "topcategory"
+            ] = papp.protocol.topcategory
+            experiment_documentation["protocol"].attrs[
+                "code"
+            ] = papp.protocol.category.code
+            experiment_documentation["protocol"].attrs[
+                "term"
+            ] = papp.protocol.category.term
+            experiment_documentation["protocol"].attrs[
+                "title"
+            ] = papp.protocol.category.title
+            experiment_documentation["protocol"].attrs[
+                "endpoint"
+            ] = papp.protocol.endpoint
+            experiment_documentation["protocol"].attrs[
+                "guideline"
+            ] = papp.protocol.guideline
             # definition is usually reference to the Nexus XML definition
             # ambit category codes and method serve similar role
             nx_root["{}/definition".format(entry_id)] = (
@@ -134,6 +206,7 @@ def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=Fal
                     papp.protocol.guideline,
                 )
             )
+
             if papp.parameters is not None:
                 for tag in ["E.method", "ASSAY"]:
                     if tag in papp.parameters:
@@ -156,13 +229,15 @@ def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=Fal
     nxmap.attrs["PROTOCOL_APPLICATION_UUID"] = "{}/entry_identifier_uuid".format(
         entry_id
     )
-    nxmap.attrs["INVESTIGATION_UUID"] = "{}/collection_identifier".format(entry_id)
-    nxmap.attrs["ASSAY_UUID"] = "{}/experiment_identifier".format(entry_id)
-    nxmap.attrs["Protocol"] = "{}/experiment_documentation".format(entry_id)
-    nxmap.attrs["Citation"] = "{}/reference".format(entry_id)
-    nxmap.attrs["Substance"] = "{}/sample".format(entry_id)
-    nxmap.attrs["Parameters"] = ["instrument", "environment", "parameters"]
-    nxmap.attrs["EffectRecords"] = "datasets"
+
+    # no need to repeat these, rather make a xml definition and refer to it
+    # nxmap.attrs["INVESTIGATION_UUID"] = "{}/collection_identifier".format(entry_id)
+    # nxmap.attrs["ASSAY_UUID"] = "{}/experiment_identifier".format(entry_id)
+    # nxmap.attrs["Protocol"] = "{}/experiment_documentation".format(entry_id)
+    # nxmap.attrs["Citation"] = "{}/reference".format(entry_id)
+    # nxmap.attrs["Substance"] = "{}/sample".format(entry_id)
+    # nxmap.attrs["Parameters"] = ["instrument", "environment", "parameters"]
+    # nxmap.attrs["EffectRecords"] = "datasets"
 
     try:
         citation_id = "{}/reference".format(entry_id)
@@ -201,61 +276,42 @@ def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=Fal
             nx_root[substance_id].attrs["uuid"] = papp.owner.substance.uuid
         nx_root["{}/sample/substance".format(entry_id)] = nx.NXlink(substance_id)
 
-    # parameters
-    if not ("{}/instrument".format(entry_id) in nx_root):
-        nx_root["{}/instrument".format(entry_id)] = nx.NXinstrument()
-    instrument = nx_root["{}/instrument".format(entry_id)]
-
-    if not ("{}/parameters".format(entry_id) in nx_root):
-        nx_root["{}/parameters".format(entry_id)] = nx.NXcollection()
-    parameters = nx_root["{}/parameters".format(entry_id)]
-
-    if not ("{}/environment".format(entry_id) in nx_root):
-        nx_root["{}/environment".format(entry_id)] = nx.NXenvironment()
-    environment = nx_root["{}/environment".format(entry_id)]
-
-    if not (papp.parameters is None):
-        for prm in papp.parameters:
+    if papp.parameters is not None:
+        for prm_path in papp.parameters:
             try:
-                value = papp.parameters[prm]
-                # Invalid path if the key contains /
-                # prm = prm.replace("/","_")
-                target = environment
-                if "instrument" in prm.lower():
-                    target = instrument
-                if "technique" in prm.lower():
-                    target = instrument
-                if "wavelength" in prm.lower():
-                    target = instrument
-                elif "sample" in prm.lower():
-                    target = sample
-                elif "material" in prm.lower():
-                    target = sample
-                elif ("ASSAY" == prm.upper()) or ("E.METHOD" == prm.upper()):
-                    target = nx_root[entry_id]["experiment_documentation"]
-                    # continue
-                elif "E.SOP_REFERENCE" == prm:
-                    # target = instrument
-                    target = nx_root[entry_id]["experiment_documentation"]
-                elif "OPERATOR" == prm:
-                    # target = instrument
-                    target = nx_root[entry_id]["experiment_documentation"]
-                elif prm.startswith("T."):
-                    target = instrument
-
-                if "EXPERIMENT_END_DATE" == prm:
-                    nx_root[entry_id]["end_time"] = value
-                elif "EXPERIMENT_START_DATE" == prm:
-                    nx_root[entry_id]["start_time"] = value
-                elif "__input_file" == prm:
-                    nx_root[entry_id]["experiment_documentation"][prm] = value
-                elif isinstance(value, str):
-                    target[prm] = nx.NXfield(str(value))
+                value = papp.parameters[prm_path]
+                prms = prm_path.split("/")
+                if len(prms) == 1:
+                    prms = param_lookup(prm_path, value)
+                # print(prms,prms[:-1])
+                _entry = nx_root[entry_id]
+                for _group in prms[:-1]:
+                    if _group not in _entry:
+                        if _group == "instrument":
+                            _entry[_group] = nx.NXinstrument()
+                        elif _group == "environment":
+                            _entry[_group] = nx.NXenvironment()
+                        elif _group == "parameters":
+                            _entry[_group] = nx.NXcollection()
+                        elif _group == "experiment_documentation":
+                            _entry[_group] = nx.NXnote()
+                        else:
+                            _entry[_group] = nx.NXgroup()
+                    _entry = _entry[_group]
+                target = _entry
+                prm = prms[-1]
+
+                if isinstance(value, str):
+                    target[prm] = nx.NXfield(value)
+                elif isinstance(value, int):
+                    target[prm] = nx.NXfield(value)
+                elif isinstance(value, float):
+                    target[prm] = nx.NXfield(value)
                 elif isinstance(value, Value):
                     # tbd ranges?
                     target[prm] = nx.NXfield(value.loValue, unit=value.unit)
                 else:
-                    target = parameters
+                    target[prm] = nx.NXfield(str(value))
             except Exception as err:
                 raise Exception(
                     "ProtocolApplication: parameters parsing error {} {}".format(
@@ -263,9 +319,9 @@ def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=Fal
                     )
                 ) from err
 
-    if not (papp.owner is None):
+    if papp.owner is not None:
         try:
-            sample["uuid"] = papp.owner.substance.uuid
+            sample.attrs["uuid"] = papp.owner.substance.uuid
             sample["provider"] = papp.owner.company.name
         except Exception as err:
             raise Exception(
@@ -284,7 +340,8 @@ def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=Fal
 
     # nx_root["/group_byexperiment"] = nx.NXgroup()
     # print(nx_root[entry_id].attrs)
-    # nx_root["/group_byexperiment{}".format(entry_id)] = nx.NXlink("{}/RAW_DATA".format(entry_id),abspath=True,soft=True)
+    # nx_root["/group_byexperiment{}".format(entry_id)] = nx.NXlink(
+    #     "{}/RAW_DATA".format(entry_id),abspath=True,soft=True)
     # nx_root["/group_byexperiment/{}".format("xyz")] = nx.NXlink(substance_id)
     # nx.NXlink(nx_root[entry_id])
     # nx_root[_categories_collection] = nx.NXlink(entry_id)
@@ -292,7 +349,7 @@ def to_nexus(papp: ProtocolApplication, nx_root: nx.NXroot = None, hierarchy=Fal
 
 
 @add_ambitmodel_method(Study)
-def to_nexus(study: Study, nx_root: nx.NXroot = None, hierarchy=False):
+def to_nexus(study: Study, nx_root: nx.NXroot = None, hierarchy=False):  # noqa: F811
     if nx_root is None:
         nx_root = nx.NXroot()
     for papp in study.study:
@@ -302,7 +359,9 @@ def to_nexus(study: Study, nx_root: nx.NXroot = None, hierarchy=False):
 
 
 @add_ambitmodel_method(SubstanceRecord)
-def to_nexus(substance: SubstanceRecord, nx_root: nx.NXroot = None, hierarchy=False):
+def to_nexus(  # noqa: F811
+    substance: SubstanceRecord, nx_root: nx.NXroot = None, hierarchy=False
+):
     """
     SubstanceRecord to nexus entry (NXentry)
 
@@ -342,7 +401,7 @@ def to_nexus(substance: SubstanceRecord, nx_root: nx.NXroot = None, hierarchy=Fa
             print(substance.URI)
             print(err)
         nxroot.save("example.nxs",mode="w")
-    """
+    """  # noqa: B950
     if nx_root is None:
         nx_root = nx.NXroot()
 
@@ -361,7 +420,8 @@ def to_nexus(substance: SubstanceRecord, nx_root: nx.NXroot = None, hierarchy=Fa
     if substance.composition is not None:
         for index, ce in enumerate(substance.composition):
             component = nx.NXsample_component()
-            # name='' cas='' einecs='' inchikey='YVZATJAPAZIWIL-UHFFFAOYSA-M' inchi='InChI=1S/H2O.Zn/h1H2;/q;+1/p-1' formula='HOZn'
+            # name='' cas='' einecs='' inchikey='YVZATJAPAZIWIL-UHFFFAOYSA-M'
+            # inchi='InChI=1S/H2O.Zn/h1H2;/q;+1/p-1' formula='HOZn'
             component.name = ce.component.compound.name
             component.einecs = ce.component.compound.einecs
             component.cas = ce.component.compound.cas
@@ -384,7 +444,9 @@ def to_nexus(substance: SubstanceRecord, nx_root: nx.NXroot = None, hierarchy=Fa
 
 
 @add_ambitmodel_method(Substances)
-def to_nexus(substances: Substances, nx_root: nx.NXroot = None, hierarchy=False):
+def to_nexus(  # noqa: F811
+    substances: Substances, nx_root: nx.NXroot = None, hierarchy=False
+):
     if nx_root is None:
         nx_root = nx.NXroot()
     for substance in substances.substance:
@@ -393,7 +455,7 @@ def to_nexus(substances: Substances, nx_root: nx.NXroot = None, hierarchy=False)
 
 
 @add_ambitmodel_method(Composition)
-def to_nexus(composition: Composition, nx_root: nx.NXroot = None):
+def to_nexus(composition: Composition, nx_root: nx.NXroot = None):  # noqa: F811
     if nx_root is None:
         nx_root = nx.NXroot()
 
@@ -413,7 +475,8 @@ def is_alternate_axis(key: str, alt_axes: Dict[str, List[str]]) -> bool:
 
         Parameters:
         - key: The axis name to check.
-        - alt_axes: Dictionary where keys are primary axis names and values are lists of alternative axis names.
+        - alt_axes: Dictionary where keys are primary axis names and values are lists of
+        alternative axis names.
 
         Returns:
         - True if the key is an alternate axis, False otherwise.
@@ -442,39 +505,66 @@ def is_alternate_axis(key: str, alt_axes: Dict[str, List[str]]) -> bool:
 
     signal = nx.tree.NXfield(
         effect.signal.values,
-        name="value",
+        name=effect.endpoint,
         units=effect.signal.unit,
         long_name="{} {}".format(
             effect.endpoint, "" if effect.signal.unit is None else effect.signal.unit
         ).strip(),
     )
+    if effect.signal.conditions is not None:
+        for key in effect.signal.conditions:
+            signal.attrs[key] = effect.signal.conditions[key]
+
+    nxdata = nx.tree.NXdata(
+        signal=signal,
+        axes=None if len(axes) == 0 else axes,
+        errors=effect.signal.errorValue,
+        # auxiliary_signals=None if len(aux_signals) < 1 else aux_signals,
+    )
     aux_signals = []
+
     if effect.signal.auxiliary:
         for a in effect.signal.auxiliary:
-            _tmp = effect.signal.auxiliary[a]
+            item = effect.signal.auxiliary[a]
+            if isinstance(item, MetaValueArray or isinstance(item, ValueArray)):
+                _tmp = item.values
+                _tmp_unit = item.unit
+                _tmp_meta = item.conditions
+
+            elif isinstance(item, np.ndarray):
+                _tmp = item
+                _tmp_unit = effect.signal.unit
+                _tmp_meta = None
+            else:
+                continue
+
             if _tmp.size > 0:
-                aux_signals.append(
-                    nx.tree.NXfield(
+                _auxname = a.replace("/", "_")
+                long_name = "{} ({}) {}".format(
+                    effect.endpoint,
+                    a,
+                    "" if effect.signal.unit is None else effect.signal.unit,
+                ).strip()
+                if _auxname == "textValue":
+                    nxdata[_auxname] = nx.tree.NXfield(
                         _tmp,
-                        name=a.replace("/", "_"),
-                        units=effect.signal.unit,
-                        long_name="{} ({}) {}".format(
-                            effect.endpoint,
-                            a,
-                            "" if effect.signal.unit is None else effect.signal.unit,
-                        ).strip(),
+                        name=_auxname,
+                        units=_tmp_unit,
+                        long_name=long_name,
+                        dtype=string_dtype(encoding="utf-8"),
                     )
-                )
-            # print(a,aux_signal)
-    # print(effect.endpoint,aux_signals,len(aux_signals))
-    # print(">>>",effect.endpoint,effect.signal.values)
-    # aux_signals = []
-    nxdata = nx.tree.NXdata(
-        signal=signal,
-        axes=None if len(axes) == 0 else axes,
-        errors=effect.signal.errorValue,
-        auxiliary_signals=None if len(aux_signals) < 1 else aux_signals,
-    )
+                else:
+                    nxdata[_auxname] = nx.tree.NXfield(
+                        _tmp, name=_auxname, units=_tmp_unit, long_name=long_name
+                    )
+
+                if _tmp_meta is not None:
+                    for key in _tmp_meta:
+                        nxdata[_auxname].attrs[key] = _tmp_meta[key]
+                aux_signals.append(_auxname)
+
+        if len(aux_signals) > 0:
+            nxdata.attrs["auxiliary_signals"] = aux_signals
     if effect.conditions:
         for key in effect.conditions:
             nxdata.attrs[key] = effect.conditions[key]
@@ -498,6 +588,7 @@ def is_alternate_axis(key: str, alt_axes: Dict[str, List[str]]) -> bool:
     nxdata.attrs["interpretation"] = (
         "scalar" if index == 0 else ("spectrum" if index == 1 else "image")
     )
+    nxdata.title = effect.nx_name
     return nxdata
 
 
@@ -511,7 +602,7 @@ def process_pa(pa: ProtocolApplication, entry=None, nx_root: nx.NXroot = None):
         _path = "/substance/{}".format(pa.owner.substance.uuid)
         # print(_path, nx_root[_path].name)
         substance_name = nx_root[_path].name
-    except BaseException:
+    except BaseException:  # noqa: B036 FIXME
         substance_name = ""
 
     effectarrays_only, df = pa.convert_effectrecords2array()
@@ -533,7 +624,14 @@ def process_pa(pa: ProtocolApplication, entry=None, nx_root: nx.NXroot = None):
                     entry[_group_key]["description"] = effect.endpointtype
             #    entry[_group_key] = _endpointtype_groups[_group_key]
 
-            entryid = "{}_{}".format(effect.endpoint, index)
+            entryid = "{}_{}".format(
+                (
+                    effect.endpoint
+                    if effect.nx_name is None
+                    else effect.nx_name.replace("/", "_")
+                ),
+                index,
+            )
             if entryid in entry[_group_key]:
                 del entry[_group_key][entryid]
                 print("replacing {}/{}".format(_group_key, entryid))
@@ -543,9 +641,15 @@ def process_pa(pa: ProtocolApplication, entry=None, nx_root: nx.NXroot = None):
             entry[_group_key][entryid] = nxdata
             if _default is None:
                 entry.attrs["default"] = _group_key
-            nxdata.title = "{} (by {}) {}".format(
-                effect.endpoint, pa.citation.owner, substance_name
-            )
+
+            if nxdata.title is None:
+                nxdata.title = (
+                    "{} (by {}) {}".format(
+                        effect.endpoint, pa.citation.owner, substance_name
+                    )
+                    if pa.nx_name is None
+                    else pa.nx_name
+                )
 
     return entry
 
diff --git a/src/pyambit/solr_writer.py b/src/pyambit/solr_writer.py
new file mode 100644
index 0000000..2bfc993
--- /dev/null
+++ b/src/pyambit/solr_writer.py
@@ -0,0 +1,176 @@
+import json
+from typing import Dict, Union
+
+from pyambit.datamodel import (
+    EffectArray,
+    EffectRecord,
+    EffectResult,
+    ProtocolApplication,
+    SubstanceRecord,
+    Substances,
+    Value,
+)
+
+
+class Ambit2Solr:
+
+    def __init__(self, prefix: str):
+        self.prefix = prefix
+
+    def __enter__(self):
+        self._solr = []
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        # Any cleanup code, if needed
+        pass
+
+    def prm2solr(self, params: Dict, key: str, value: Union[str, Value, None]):
+        if isinstance(value, str):
+            params["{}_s".format(key)] = value
+        elif isinstance(value, int):
+            params["{}_d".format(key)] = value
+        elif isinstance(value, float):
+            params["{}_d".format(key)] = value
+        elif isinstance(value, Value):
+            if value.loValue is not None:
+                params["{}_d".format(key)] = value.loValue
+            if value.unit is not None:
+                params["{}_UNIT_s".format(key)] = value.unit
+
+    def effectresult2solr(self, effect_result: EffectResult, solr_index=None):
+        if solr_index is None:
+            solr_index = {}
+        if effect_result.loValue is not None:
+            solr_index["loValue_d"] = effect_result.loValue
+        if effect_result.loQualifier is not None:
+            solr_index["loQualifier_s"] = effect_result.loQualifier
+        if effect_result.upQualifier is not None:
+            solr_index["upQualifier_s"] = effect_result.upQualifier
+        if effect_result.upValue is not None:
+            solr_index["upValue_d"] = effect_result.upValue
+        if effect_result.unit is not None:
+            solr_index["unit_s"] = effect_result.unit
+        if effect_result.textValue is not None:
+            solr_index["textValue_s"] = effect_result.textValue
+
+    def effectrecord2solr(self, effect: EffectRecord, solr_index=None):
+        if solr_index is None:
+            solr_index = {}
+        if isinstance(effect, EffectArray):
+            # tbd - this is new in pyambit, we did not have array results implementation
+            if effect.result is not None:  # EffectResult
+                self.effectresult2solr(effect.result, solr_index)
+            # e.g. vector search
+            if effect.endpointtype == "embeddings":
+                solr_index[effect.endpoint] = effect.signal.values.tolist()
+        elif isinstance(effect, EffectRecord):
+            # conditions
+            if effect.result is not None:  # EffectResult
+                self.effectresult2solr(effect.result, solr_index)
+
+    def entry2solr(self, papp: ProtocolApplication):
+        papp_solr = []
+        for _id, effect in enumerate(papp.effects, start=1):
+            _solr = {}
+            _solr["id"] = "{}/{}".format(papp.uuid, _id)
+            _solr["investigation_uuid_s"] = papp.investigation_uuid
+            _solr["assay_uuid_s"] = papp.assay_uuid
+            _solr["type_s"] = "study"
+            _solr["document_uuid_s"] = papp.uuid
+
+            _solr["topcategory_s"] = papp.protocol.topcategory
+            _solr["endpointcategory_s"] = (
+                "UNKNOWN"
+                if papp.protocol.category is None
+                else papp.protocol.category.code
+            )
+            _solr["guidance_s"] = papp.protocol.guideline
+            # _solr["guidance_synonym_ss"] = ["FIX_0000058"]
+            # _solr["E.method_synonym_ss"] = ["FIX_0000058"]
+            _solr["endpoint_s"] = papp.protocol.endpoint
+            _solr["effectendpoint_s"] = effect.endpoint
+            _solr["effectendpoint_type_s"] = effect.endpointtype
+            # _solr["effectendpoint_synonym_ss"] = ["CHMO_0000823"]
+            _solr["reference_owner_s"] = papp.citation.owner
+            _solr["reference_year_s"] = papp.citation.year
+            _solr["reference_s"] = papp.citation.title
+            _solr["updated_s"] = papp.updated
+            if "E.method_s" in papp.parameters:
+                _solr["E.method_s"] = papp.parameters["E.method_s"]
+            self.effectrecord2solr(effect, _solr)
+
+            _conditions = {"type_s": "conditions"}
+            _conditions["topcategory_s"] = papp.protocol.topcategory
+            _conditions["endpointcategory_s"] = (
+                "UNKNOWN"
+                if papp.protocol.category is None
+                else papp.protocol.category.code
+            )
+            _conditions["document_uuid_s"] = papp.uuid
+            _conditions["id"] = "{}/cn".format(_solr["id"])
+            for prm in effect.conditions:
+                self.prm2solr(_conditions, prm, effect.conditions[prm])
+            _solr["_childDocuments_"] = [_conditions]
+
+        _params = {}
+        for prm in papp.parameters:
+            self.prm2solr(_params, prm, papp.parameters[prm])
+            _params["document_uuid_s"] = papp.uuid
+            _params["id"] = "{}/prm".format(papp.uuid)
+            _params["topcategory_s"] = papp.protocol.topcategory
+            _params["endpointcategory_s"] = (
+                "UNKNOWN"
+                if papp.protocol.category is None
+                else papp.protocol.category.code
+            )
+            if "E.method_s" in papp.parameters:
+                _params["E.method_s"] = papp.parameters["E.method_s"]
+            _params["type_s"] = "params"
+            _solr["_childDocuments_"] = [_params]
+        papp_solr.append(_solr)
+        return papp_solr
+
+    def substancerecord2solr(self, substance: SubstanceRecord):
+        _solr = {}
+        _solr["content_hss"] = []
+        _solr["dbtag_hss"] = self.prefix
+        _solr["name_hs"] = substance.name
+        _solr["publicname_hs"] = substance.publicname
+        _solr["owner_name_hs"] = substance.ownerName
+        _solr["substanceType_hs"] = substance.substanceType
+        _solr["type_s"] = "substance"
+        _solr["s_uuid_hs"] = substance.i5uuid
+        _solr["id"] = substance.i5uuid
+        _studies = []
+        _solr["SUMMARY.RESULTS_hss"] = []
+        for _papp in substance.study:
+            _study_solr = self.entry2solr(_papp)
+            for _study in _study_solr:
+                _study["s_uuid_s"] = substance.i5uuid
+                _study["type_s"] = "study"
+                _study["name_s"] = substance.name
+                _study["publicname_s"] = substance.publicname
+                _study["substanceType_s"] = substance.substanceType
+                _study["owner_name_s"] = substance.ownerName
+            _studies.extend(_study_solr)
+        _solr["_childDocuments_"] = _studies
+        _solr["SUMMARY.REFS_hss"] = []
+        _solr["SUMMARY.REFOWNERS_hss"] = []
+
+        return _solr
+
+    def substances2solr(self, substances: Substances, buffer=None):
+        if buffer is None:
+            buffer = []
+        for substance in substances.substance:
+            buffer.append(self.substancerecord2solr(substance))
+        return buffer
+
+    def to_json(self, substances: Substances):
+        return self.substances2solr(substances)
+
+    def write(self, substances, file_path):
+        _json = self.to_json(substances)
+        with open(file_path, "w") as file:
+            json.dump(_json, file)
diff --git a/tests/pyambit/datamodel/datamodel_test.py b/tests/pyambit/datamodel/datamodel_test.py
index 570bc78..2cf5569 100644
--- a/tests/pyambit/datamodel/datamodel_test.py
+++ b/tests/pyambit/datamodel/datamodel_test.py
@@ -5,7 +5,7 @@
 import numpy as np
 import numpy.typing as npt
 import pyambit.datamodel as mb
-from pydantic_core import from_json
+
 
 TEST_DIR = Path(__file__).parent.parent / "resources"
 
@@ -24,14 +24,57 @@ def test_substances_load():
     assert substances == new_val
 
 
+def test_basevaluearray_roundtrip():
+    """
+    Test the roundtrip serialization and deserialization of the ValueArray model.
+    """
+    a1: npt.NDArray[np.float64] = np.ones(5)
+    a0: npt.NDArray[np.float64] = np.zeros(5)
+    val = mb.BaseValueArray(values=a1, unit="unit", errQualifier="SD", errorValue=a0)
+
+    data = json.loads(val.model_dump_json())
+    # print(data)
+    new_val = mb.BaseValueArray.model_construct(**data)
+
+    assert val == new_val
+
+
+def test_metavaluearray_roundtrip():
+    """
+    Test the roundtrip serialization and deserialization of the MetaValueArray model.
+    """
+    a1: npt.NDArray[np.float64] = np.ones(5)
+    a0: npt.NDArray[np.float64] = np.zeros(5)
+    val = mb.MetaValueArray(
+        values=a1,
+        unit="unit",
+        errQualifier="SD",
+        errorValue=a0,
+        conditions={"test": "test"},
+    )
+
+    data = json.loads(val.model_dump_json())
+    # print(data)
+    new_val = mb.MetaValueArray.model_construct(**data)
+
+    assert val == new_val
+
+
 def test_valuearray_roundtrip():
     """
     Test the roundtrip serialization and deserialization of the ValueArray model.
     """
     a1: npt.NDArray[np.float64] = np.ones(5)
     a0: npt.NDArray[np.float64] = np.zeros(5)
-    val = mb.ValueArray(values=a1, unit="unit", errQualifier="SD", errorValue=a0)
+    val = mb.ValueArray(
+        values=a1,
+        unit="unit",
+        errQualifier="SD",
+        errorValue=a0,
+        conditions={"test": "test"},
+    )
 
+    assert val.conditions is not None
     data = json.loads(val.model_dump_json())
     new_val = mb.ValueArray.model_construct(**data)
 
@@ -42,7 +85,7 @@ def test_valuearrayaux_roundtrip():
     """
     Test the roundtrip serialization and deserialization of the ValueArray model.
     """
-    shape = tuple((10, 2, 1))
+    shape = tuple((3, 2, 1))
     matrix_vals = np.random.random(shape) * 3
     matrix_errs = np.random.random(shape)
     matrix_upValue = np.random.random(shape) * 5
@@ -58,7 +101,10 @@ def test_valuearrayaux_roundtrip():
 
     data = json.loads(val.model_dump_json())
     new_val = mb.ValueArray.model_construct(**data)
-
+    for key in val.auxiliary:
+        print("old", key, type(val.auxiliary[key]))
+    for key in new_val.auxiliary:
+        print("new", key, type(new_val.auxiliary[key]))
     assert val == new_val
 
 
@@ -81,6 +127,29 @@ def test_valuearray_roundtrip_withaux():
     assert val == new_val
 
 
+def test_valuearray_roundtrip_with_arrayaux():
+    """
+    Test the roundtrip serialization and deserialization of the ValueArray model.
+    """
+
+    b1: npt.NDArray[np.float64] = np.ones(10)
+    aux = mb.MetaValueArray(values=b1, unit="bunit")
+
+    a1: npt.NDArray[np.float64] = np.ones(5)
+    a0: npt.NDArray[np.float64] = np.zeros(5)
+    val = mb.ValueArray(
+        values=a1,
+        unit="unit",
+        errQualifier="SD",
+        errorValue=a0,
+        auxiliary={"upValue": a1, "array": aux},
+    )
+
+    data = json.loads(val.model_dump_json())
+    new_val = mb.ValueArray.model_construct(**data)
+    assert val == new_val
+
+
 def test_value_roundtrip():
     """
     Test the roundtrip serialization and deserialization of the Value model.
@@ -216,7 +285,8 @@ def test_effect_array_roundtrip():
 
 def test_protocol_effect_record_roundtrip():
     """
-    Test the roundtrip serialization and deserialization of the ProtocolEffectRecord model.
+    Test the roundtrip serialization and deserialization of the ProtocolEffectRecord
+    model.
     """
     protocol = mb.Protocol(
         topcategory="TOX",
@@ -385,7 +455,8 @@ def create_effectrecord():
 
 def test_protocol_application_roundtrip():
     """
-    Test the roundtrip serialization and deserialization of the ProtocolApplication model.
+    Test the roundtrip serialization and deserialization of the ProtocolApplication
+    model.
     """
     original = create_protocolapp4test()
 
@@ -425,7 +496,8 @@ def test_study_roundtrip():
 
 def test_component_proportion_roundtrip():
     """
-    Test the roundtrip serialization and deserialization of the ComponentProportion model.
+    Test the roundtrip serialization and deserialization of the ComponentProportion
+    model.
     """
     typical = mb.TypicalProportion(precision="<", value=5.0, unit="g")
 
@@ -527,7 +599,7 @@ def test_composition_roundtrip():
 
     see how features are expected
     https://apps.ideaconsult.net/gracious/compound/3?media=application/json&feature_uris=https://apps.ideaconsult.net/gracious/compound/3/feature
-    """
+    """  # noqa: B950
     # Create sample data for Composition
     original = mb.Composition(
         composition=[
diff --git a/tests/pyambit/datamodel/nexus_writer_test.py b/tests/pyambit/datamodel/nexus_writer_test.py
index 7e67b93..419d866 100644
--- a/tests/pyambit/datamodel/nexus_writer_test.py
+++ b/tests/pyambit/datamodel/nexus_writer_test.py
@@ -7,7 +7,7 @@
 import pytest
 
 # to_nexus is not added without this import
-from pyambit import nexus_writer
+from pyambit import nexus_writer  # noqa: F401
 from pyambit.datamodel import Study, Substances
 
 
@@ -31,6 +31,20 @@ def substances():
     return substances
 
 
+def inspect_nexus_tree(node, path="root"):
+    if isinstance(node, dict):  # If the node is a group/dictionary
+        for key, child in node.items():
+            inspect_nexus_tree(child, path + f"/{key}")
+    elif hasattr(node, "dtype"):
+        # Check if dtype is Unicode
+        if node.dtype.char == "U":
+            print(
+                f"*****Problematic Unicode data found at {path} with dtype {node.dtype}"
+            )
+    # else:
+    #    print(f"Skipping non-data node at {path}")
+
+
 def test_substances(substances):
     #
     nxroot = nx.NXroot()
@@ -38,20 +52,31 @@ def test_substances(substances):
     substances.to_nexus(nxroot, hierarchy=True)
     file = os.path.join(tempfile.gettempdir(), "substances.nxs")
     print(file)
+    inspect_nexus_tree(nxroot)
     nxroot.save(file, mode="w")
 
 
 def test_study(substances):
     for substance in substances.substance:
         for study in substance.study:
+
+            study.nx_name = "test"
             file = os.path.join(
                 tempfile.gettempdir(), "study_{}.nxs".format(study.uuid)
             )
-            print(file)
             nxroot = nx.NXroot()
             try:
-                study.to_nexus(nxroot)
+                study.to_nexus(nxroot, hierarchy=True)
+                inspect_nexus_tree(nxroot)
                 nxroot.save(file, mode="w")
             except Exception as err:
-                print(study)
+                # inspect_nexus_tree(nxroot)
+                # print(study.model_dump_json(exclude_none=True))
+                effectarrays_only, df = study.convert_effectrecords2array()
+                df.dropna(how="all").to_excel("bad.xlsx")
+                for effect in effectarrays_only:
+                    for key in effect.signal.auxiliary:
+                        for element in effect.signal.auxiliary[key].flat:
+                            print(element, end=".")
+                # print(nxroot.tree)
                 raise err
diff --git a/tests/pyambit/datamodel/solr_writer_test.py b/tests/pyambit/datamodel/solr_writer_test.py
new file mode 100644
index 0000000..e5ec276
--- /dev/null
+++ b/tests/pyambit/datamodel/solr_writer_test.py
@@ -0,0 +1,40 @@
+import json
+import os.path
+import tempfile
+from pathlib import Path
+
+import pytest
+from pyambit.datamodel import Study, Substances
+
+from pyambit.solr_writer import Ambit2Solr
+
+
+TEST_DIR = Path(__file__).parent.parent / "resources"
+
+
+@pytest.fixture(scope="module")
+def substances():
+    """
+    Fixture to load and return the Substances object.
+    """
+
+    with open(os.path.join(TEST_DIR, "substance.json"), "r", encoding="utf-8") as file:
+        json_substance = json.load(file)
+        substances = Substances(**json_substance)
+
+    with open(os.path.join(TEST_DIR, "study.json"), "r", encoding="utf-8") as file:
+        json_study = json.load(file)
+        study = Study(**json_study)
+        substances.substance[0].study = study.study
+    return substances
+
+
+def test_substances(substances):
+
+    _substances = Substances(substance=[substances.substance[0]])
+    writer: Ambit2Solr = Ambit2Solr(prefix="TEST")
+    _json = writer.to_json(_substances)
+    _file = os.path.join(tempfile.gettempdir(), "substances.json")
+    print(_file)
+    with open(_file, "w") as file:
+        json.dump(_json, file)
diff --git a/tests/pyambit/datamodel/spectra_writer_test.py b/tests/pyambit/datamodel/spectra_writer_test.py
index 8e5106c..38c7f93 100644
--- a/tests/pyambit/datamodel/spectra_writer_test.py
+++ b/tests/pyambit/datamodel/spectra_writer_test.py
@@ -4,7 +4,7 @@
 
 import nexusformat.nexus.tree as nx
 import numpy as np
-from pyambit.ambit_deco import add_ambitmodel_method
+from pyambit.ambit_deco import add_ambitmodel_method  # noqa: F401
 from pyambit.datamodel import SubstanceRecord, Substances
 from pyambit.nexus_spectra import spe2ambit