From d008aa78601376a38b116fbdf36e8e90b55dd539 Mon Sep 17 00:00:00 2001
From: Lan Le <lan.le@kit.edu>
Date: Wed, 11 Oct 2023 13:08:44 +0200
Subject: [PATCH] refactor: update to read parsed jdx file

---
 .github/workflows/unit_test.yml               |   4 +-
 INSTALL.md                                    |   4 +-
 chem_spectra/lib/converter/jcamp/base.py      |   5 +-
 .../lib/converter/jcamp/data_parse.py         |  21 ++-
 environment.yml                               | 146 ++++++++++--------
 requirements.txt                              |  18 +--
 tests/lib/converter/jcamp/test_data_parse.py  |  26 ++++
 tests/test_bagit.py                           |   1 -
 8 files changed, 145 insertions(+), 80 deletions(-)
 create mode 100644 tests/lib/converter/jcamp/test_data_parse.py

diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml
index c17134f5..53b24486 100644
--- a/.github/workflows/unit_test.yml
+++ b/.github/workflows/unit_test.yml
@@ -8,10 +8,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.7
+    - name: Set up Python 3.9
       uses: actions/setup-python@v4
       with:
-        python-version: 3.7
+        python-version: 3.9
     - name: Install dependencies
       run: |
         $CONDA/bin/conda env update --file environment.yml --name base
diff --git a/INSTALL.md b/INSTALL.md
index d24b62b3..b8e61f3a 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -17,12 +17,12 @@ However, it is highly recommended to refer to official websites.
 _Logout & login to load installations._
 
 ```
-$ conda create --name chem-spectra python=3.7
+$ conda create --name chem-spectra python=3.9
 $ source activate chem-spectra
 ```
 
 ```
-$ conda install -c rdkit rdkit=2020.09.1.0
+$ conda install -c conda-forge rdkit
 ```
 
 ```
diff --git a/chem_spectra/lib/converter/jcamp/base.py b/chem_spectra/lib/converter/jcamp/base.py
index d6351176..403498cf 100644
--- a/chem_spectra/lib/converter/jcamp/base.py
+++ b/chem_spectra/lib/converter/jcamp/base.py
@@ -2,6 +2,7 @@
 import json
 
 from chem_spectra.lib.converter.share import parse_params, parse_solvent
+from chem_spectra.lib.converter.jcamp.data_parse import read_parsed_jdx_data
 import os
 
 data_type_json = os.path.join(os.path.dirname(__file__), 'data_type.json')
@@ -43,7 +44,9 @@ def __init__(self, path, params=False):
         self.__read_solvent()
 
     def __read(self, path):
-        return ng.jcampdx.read(path, show_all_data=True, read_err='ignore')
+        parsed_data = ng.jcampdx.read(path, show_all_data=True, read_err='ignore')
+        return_dic, return_data = read_parsed_jdx_data(parsed_data)
+        return return_dic, return_data
 
     def __set_datatype(self):
         dts = self.datatypes
diff --git a/chem_spectra/lib/converter/jcamp/data_parse.py b/chem_spectra/lib/converter/jcamp/data_parse.py
index c6ef52d7..84bee5e0 100644
--- a/chem_spectra/lib/converter/jcamp/data_parse.py
+++ b/chem_spectra/lib/converter/jcamp/data_parse.py
@@ -51,8 +51,6 @@ def make_ni_data_xs(base):
 
     return None
 
-    
-
 
 def make_ni_data_xs(base):
     if base.data_format and (base.data_format == '(XY..XY)'):
@@ -67,11 +65,26 @@ def make_ni_data_xs(base):
 
 
 def make_ms_data_xsys(base):
-    if base.data is None and base.dic['XYPOINTS']:
-        base.data = [__parse_xy_points(base)]
+    if base.data is None:
+        if 'XYPOINTS' in base.dic and base.dic['XYPOINTS']:
+            base.data = [__parse_xy_points(base)]
+        elif 'DATATABLE' in base.dic and base.dic['DATATABLE']:
+            base.data_format = '(XY..XY)'
+            base.data = [__parse_xy_points(base)]
 
     # base.data type is dict
     if isinstance(base.data, dict):
         return base.data['real']
 
     return base.data
+
+def read_parsed_jdx_data(parsed_data):
+    return_dic = {}
+    dic, data = parsed_data
+    for key in dic.keys():
+        if '_datatype_' in key:
+            return_dic = dic[key][0]
+            break
+        else:
+            return_dic[key] = dic[key]
+    return return_dic, data
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
index 375bdf54..d6a04326 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,110 +1,134 @@
 name: chem-spectra-dev
 channels:
-  - rdkit
+  - conda-forge
   - defaults
 dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=5.1=1_gnu
-  - blas=1.0=mkl
-  - bottleneck=1.3.5=py37h7deecbd_0
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2023.01.10=h06a4308_0
-  - cairo=1.16.0=hb05425b_4
-  - certifi=2022.12.7=py37h06a4308_0
-  - expat=2.4.9=h6a678d5_0
-  - fontconfig=2.14.1=h4c34cd2_2
-  - freetype=2.12.1=h4a9f257_0
-  - giflib=5.2.1=h5eee18b_3
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_kmp_llvm
+  - blas=2.119=openblas
+  - blas-devel=3.9.0=19_linux64_openblas
+  - boost=1.74.0=py39h5472131_5
+  - boost-cpp=1.74.0=h75c5d50_8
+  - bottleneck=1.3.7=py39h389d5f1_0
+  - bzip2=1.0.8=h7f98852_4
+  - ca-certificates=2023.7.22=hbcca054_0
+  - cairo=1.16.0=hb05425b_5
+  - expat=2.5.0=hcb278e6_1
+  - fontconfig=2.14.1=h52c9d5c_1
+  - freetype=2.10.4=hca18f0e_2
+  - giflib=5.2.1=h0b41bf4_3
   - glib=2.69.1=he621ea3_2
-  - icu=58.2=he6710b0_3
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jpeg=9e=h5eee18b_1
-  - lcms2=2.12=h3be6417_0
+  - greenlet=2.0.1=py39h6a678d5_0
+  - icu=70.1=h27087fc_0
+  - jpeg=9e=h0b41bf4_3
+  - lcms2=2.15=hfd0df8a_0
   - ld_impl_linux-64=2.38=h1181459_1
   - lerc=3.0=h295c915_0
-  - libboost=1.73.0=h28710b8_12
-  - libdeflate=1.17=h5eee18b_0
-  - libffi=3.4.2=h6a678d5_6
-  - libgcc-ng=11.2.0=h1234567_1
-  - libgomp=11.2.0=h1234567_1
-  - libpng=1.6.39=h5eee18b_0
+  - libblas=3.9.0=19_linux64_openblas
+  - libcblas=3.9.0=19_linux64_openblas
+  - libdeflate=1.17=h5eee18b_1
+  - libexpat=2.5.0=hcb278e6_1
+  - libffi=3.4.4=h6a678d5_0
+  - libgcc-ng=13.2.0=h807b86a_2
+  - libgfortran-ng=13.2.0=h69a702a_2
+  - libgfortran5=13.2.0=ha4646dd_2
+  - libiconv=1.17=h166bdaf_0
+  - liblapack=3.9.0=19_linux64_openblas
+  - liblapacke=3.9.0=19_linux64_openblas
+  - libopenblas=0.3.24=pthreads_h413a1c8_0
+  - libpng=1.6.39=h753d276_0
   - libstdcxx-ng=11.2.0=h1234567_1
-  - libtiff=4.5.0=h6a678d5_2
+  - libtiff=4.5.1=h6a678d5_0
   - libuuid=1.41.5=h5eee18b_0
-  - libwebp=1.2.4=h11a3e52_1
-  - libwebp-base=1.2.4=h5eee18b_1
-  - libxcb=1.15=h7f8727e_0
-  - libxml2=2.10.3=hcbfbd50_0
+  - libwebp=1.3.2=h11a3e52_0
+  - libwebp-base=1.3.2=hd590300_0
+  - libxcb=1.16=hd590300_0
+  - libxml2=2.9.14=h22db469_4
+  - libzlib=1.2.13=hd590300_5
+  - llvm-openmp=17.0.3=h4dfa4b3_0
   - lz4-c=1.9.4=h6a678d5_0
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py37h7f8727e_0
-  - mkl_fft=1.3.1=py37hd3c417c_0
-  - mkl_random=1.2.2=py37h51133e4_0
+  - matplotlib-base=3.4.3=py39h2fa2bec_2
   - ncurses=6.4=h6a678d5_0
-  - numexpr=2.8.4=py37he184ba9_0
-  - numpy=1.21.5=py37h6c91a56_3
-  - numpy-base=1.21.5=py37ha15fc14_3
-  - openssl=1.1.1t=h7f8727e_0
-  - packaging=22.0=py37h06a4308_0
-  - pcre=8.45=h295c915_0
-  - pip=22.3.1=py37h06a4308_0
+  - numexpr=2.8.7=py39h286c3b5_0
+  - openblas=0.3.24=pthreads_h7a3da1a_0
+  - openssl=3.1.4=hd590300_0
+  - pcre=8.45=h9c3ff4c_0
+  - pip=23.2.1=py39h06a4308_0
   - pixman=0.40.0=h7f8727e_1
-  - py-boost=1.73.0=py37h51133e4_12
-  - python=3.7.16=h7a1cb2a_0
-  - rdkit=2020.09.1.0=py37hd50e099_1
+  - pthread-stubs=0.4=h36c2ea0_1001
+  - pycairo=1.23.0=py39hd1222b9_0
+  - python=3.9.18=h955ad1f_0
+  - python-dateutil=2.8.2=pyhd8ed1ab_0
+  - python_abi=3.9=2_cp39
+  - pytz=2023.3.post1=pyhd8ed1ab_0
+  - rdkit=2022.03.2=py39h89e00b9_0
   - readline=8.2=h5eee18b_0
-  - setuptools=65.6.3=py37h06a4308_0
-  - sqlite=3.41.1=h5eee18b_0
+  - reportlab=3.6.12=py39h5eee18b_0
+  - setuptools=68.0.0=py39h06a4308_0
+  - sqlalchemy=2.0.22=py39hd1e30aa_0
+  - sqlite=3.41.2=h5eee18b_0
   - tk=8.6.12=h1ccaba5_0
-  - wheel=0.38.4=py37h06a4308_0
-  - xz=5.2.10=h5eee18b_1
-  - zlib=1.2.13=h5eee18b_0
-  - zstd=1.5.2=ha4553b6_0
+  - tornado=6.3.3=py39hd1e30aa_1
+  - typing-extensions=4.8.0=hd8ed1ab_0
+  - typing_extensions=4.8.0=pyha770c72_0
+  - wheel=0.41.2=py39h06a4308_0
+  - xorg-libxau=1.0.11=hd590300_0
+  - xorg-libxdmcp=1.1.3=h7f98852_0
+  - xz=5.4.2=h5eee18b_0
+  - zlib=1.2.13=hd590300_5
+  - zstd=1.5.5=hc292b87_0
   - pip:
       - atomicwrites==1.3.0
       - attrs==19.1.0
-      - cftime==1.6.2
+      - certifi==2022.12.7
+      - cftime==1.6.3
       - chardet==3.0.4
       - click==8.0.0
+      - contourpy==1.1.1
       - coverage==4.5.3
       - cycler==0.10.0
-      - datetime==5.1
       - entrypoints==0.3
+      - exceptiongroup==1.1.3
       - flake8==3.7.7
       - flask==2.2.5
+      - flask-jwt-extended==4.5.2
+      - fonttools==4.43.1
       - gunicorn==19.9.0
       - idna==2.7
       - importlib-metadata==3.6.0
+      - importlib-resources==5.1.3
+      - iniconfig==2.0.0
       - itsdangerous==2.0.0
       - jinja2==3.1.2
-      - kiwisolver==1.4.4
+      - kiwisolver==1.4.5
       - markupsafe==2.1.2
-      - matplotlib==3.0.0
+      - matplotlib==3.8.0
       - mccabe==0.6.1
       - more-itertools==7.2.0
-      - netcdf4==1.5.3
+      - netcdf4==1.6.4
+      - numpy==1.26.0
       - olefile==0.46
-      - pandas==0.23.4
+      - packaging==23.2
+      - pandas==2.1.1
       - pathlib2==2.3.4
       - pillow==9.3.0
       - pluggy==0.12.0
       - py==1.11.0
       - pycodestyle==2.5.0
       - pyflakes==2.1.1
+      - pyjwt==2.8.0
       - pymzml==2.5.2
       - pyopenms==2.6.0
       - pyparsing==2.4.2
-      - pytest==4.0.0
-      - python-dateutil==2.7.3
-      - pytz==2019.1
+      - pytest==7.4.2
       - regex==2019.4.9
       - requests==2.25.0
-      - scipy==1.2.0
+      - scipy==1.10.0
       - six==1.11.0
-      - typing-extensions==4.5.0
+      - tomli==2.0.1
+      - tzdata==2023.3
       - urllib3==1.26.5
       - werkzeug==2.2.3
       - zipp==0.5.2
-      - zope-interface==6.0
-      - -e git+https://github.com/ComPlat/nmrglue.git@c5a7d4d0073fedff68808b4e9c95836a8c20413e#egg=nmrglue
+      - -e git+https://github.com/ComPlat/nmrglue.git@development#egg=nmrglue
 prefix: /home/eln/anaconda3/envs/chem-spectra-dev
diff --git a/requirements.txt b/requirements.txt
index 91f3c315..e8a748fa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,15 +14,15 @@ importlib-metadata==3.6.0
 itsdangerous==2.0
 Jinja2==3.1.2
 MarkupSafe==2.1.2
-matplotlib==3.0.0
+matplotlib==3.8.0
 mccabe==0.6.1
 more-itertools==7.2.0
 # -e git+https://github.com/ComPlat/nmrglue.git@c5a7d4d0073fedff68808b4e9c95836a8c20413e#egg=nmrglue
--e git+https://github.com/ComPlat/nmrglue.git@68388863ed3d7a6def7837155432ceb2134dae92#egg=nmrglue
-netCDF4==1.5.3
-numpy==1.21.5
+-e git+https://github.com/ComPlat/nmrglue.git@development#egg=nmrglue
+netCDF4==1.6.4
+numpy==1.26.0
 olefile==0.46
-pandas==0.23.4
+pandas==2.1.1
 pathlib2==2.3.4
 Pillow==9.3.0
 pluggy==0.12.0
@@ -31,12 +31,12 @@ pycodestyle==2.5.0
 pyflakes==2.1.1
 pymzml==2.5.2
 pyparsing==2.4.2
-pytest==4.0.0
-python-dateutil==2.7.3
-pytz==2019.1
+pytest==7.4.2
+python-dateutil==2.8.2
+pytz==2023.3.post1
 regex==2019.4.9
 requests==2.25.0
-scipy==1.2.0
+scipy==1.10.0
 six==1.11.0
 urllib3==1.26.5
 Werkzeug==2.2.3
diff --git a/tests/lib/converter/jcamp/test_data_parse.py b/tests/lib/converter/jcamp/test_data_parse.py
new file mode 100644
index 00000000..4d8e8216
--- /dev/null
+++ b/tests/lib/converter/jcamp/test_data_parse.py
@@ -0,0 +1,26 @@
+import pytest
+from chem_spectra.lib.converter.jcamp.data_parse import read_parsed_jdx_data
+
+@pytest.fixture
+def parsed_data_with_old_dic_structure():
+    return {'DATATYPE': ['NMR']}, []
+
+@pytest.fixture
+def parsed_data_with_new_dic_structure():
+    return {
+      '_datatype_':[
+        {'DATATYPE': ['NMR']}
+      ]
+    }, []
+
+def test_read_parsed_jdx_data_with_old_dic(parsed_data_with_old_dic_structure):
+    dic, data = read_parsed_jdx_data(parsed_data_with_old_dic_structure)
+    assert isinstance(dic, dict)
+    assert dic['DATATYPE'] == ['NMR']
+    assert isinstance(data, list)
+
+def test_read_parsed_jdx_data_with_new_dic(parsed_data_with_new_dic_structure):
+    dic, data = read_parsed_jdx_data(parsed_data_with_new_dic_structure)
+    assert isinstance(dic, dict)
+    assert dic['DATATYPE'] == ['NMR']
+    assert isinstance(data, list)
diff --git a/tests/test_bagit.py b/tests/test_bagit.py
index 26687f57..cd274ed2 100644
--- a/tests/test_bagit.py
+++ b/tests/test_bagit.py
@@ -4,7 +4,6 @@
 from chem_spectra.controller.helper.file_container import FileContainer
 from chem_spectra.lib.shared.buffer import store_str_in_tmp, store_byte_in_tmp
 
-from chem_spectra.lib.converter.jcamp.base import JcampBaseConverter
 from chem_spectra.lib.converter.bagit.base import BagItBaseConverter
 
 target_dir = './tests/fixtures/'