From d008aa78601376a38b116fbdf36e8e90b55dd539 Mon Sep 17 00:00:00 2001 From: Lan Le Date: Wed, 11 Oct 2023 13:08:44 +0200 Subject: [PATCH] refactor: update to read parsed jdx file --- .github/workflows/unit_test.yml | 4 +- INSTALL.md | 4 +- chem_spectra/lib/converter/jcamp/base.py | 5 +- .../lib/converter/jcamp/data_parse.py | 21 ++- environment.yml | 146 ++++++++++-------- requirements.txt | 18 +-- tests/lib/converter/jcamp/test_data_parse.py | 26 ++++ tests/test_bagit.py | 1 - 8 files changed, 145 insertions(+), 80 deletions(-) create mode 100644 tests/lib/converter/jcamp/test_data_parse.py diff --git a/.github/workflows/unit_test.yml b/.github/workflows/unit_test.yml index c17134f5..53b24486 100644 --- a/.github/workflows/unit_test.yml +++ b/.github/workflows/unit_test.yml @@ -8,10 +8,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Python 3.7 + - name: Set up Python 3.9 uses: actions/setup-python@v4 with: - python-version: 3.7 + python-version: 3.9 - name: Install dependencies run: | $CONDA/bin/conda env update --file environment.yml --name base diff --git a/INSTALL.md b/INSTALL.md index d24b62b3..b8e61f3a 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -17,12 +17,12 @@ However, it is highly recommended to refer to official websites. _Logout & login to load installations._ ``` -$ conda create --name chem-spectra python=3.7 +$ conda create --name chem-spectra python=3.9 $ source activate chem-spectra ``` ``` -$ conda install -c rdkit rdkit=2020.09.1.0 +$ conda install -c conda-forge rdkit ``` ``` diff --git a/chem_spectra/lib/converter/jcamp/base.py b/chem_spectra/lib/converter/jcamp/base.py index d6351176..403498cf 100644 --- a/chem_spectra/lib/converter/jcamp/base.py +++ b/chem_spectra/lib/converter/jcamp/base.py @@ -2,6 +2,7 @@ import json from chem_spectra.lib.converter.share import parse_params, parse_solvent +from chem_spectra.lib.converter.jcamp.data_parse import read_parsed_jdx_data import os data_type_json = os.path.join(os.path.dirname(__file__), 'data_type.json') @@ -43,7 +44,9 @@ def __init__(self, path, params=False): self.__read_solvent() def __read(self, path): - return ng.jcampdx.read(path, show_all_data=True, read_err='ignore') + parsed_data = ng.jcampdx.read(path, show_all_data=True, read_err='ignore') + return_dic, return_data = read_parsed_jdx_data(parsed_data) + return return_dic, return_data def __set_datatype(self): dts = self.datatypes diff --git a/chem_spectra/lib/converter/jcamp/data_parse.py b/chem_spectra/lib/converter/jcamp/data_parse.py index c6ef52d7..84bee5e0 100644 --- a/chem_spectra/lib/converter/jcamp/data_parse.py +++ b/chem_spectra/lib/converter/jcamp/data_parse.py @@ -51,8 +51,6 @@ def make_ni_data_xs(base): return None - - def make_ni_data_xs(base): if base.data_format and (base.data_format == '(XY..XY)'): @@ -67,11 +65,26 @@ def make_ni_data_xs(base): def make_ms_data_xsys(base): - if base.data is None and base.dic['XYPOINTS']: - base.data = [__parse_xy_points(base)] + if base.data is None: + if 'XYPOINTS' in base.dic and base.dic['XYPOINTS']: + base.data = [__parse_xy_points(base)] + elif 'DATATABLE' in base.dic and base.dic['DATATABLE']: + base.data_format = '(XY..XY)' + base.data = [__parse_xy_points(base)] # base.data type is dict if isinstance(base.data, dict): return base.data['real'] return base.data + +def read_parsed_jdx_data(parsed_data): + return_dic = {} + dic, data = parsed_data + for key in dic.keys(): + if '_datatype_' in key: + return_dic = dic[key][0] + break + else: + return_dic[key] = dic[key] + return return_dic, data \ No newline at end of file diff --git a/environment.yml b/environment.yml index 375bdf54..d6a04326 100644 --- a/environment.yml +++ b/environment.yml @@ -1,110 +1,134 @@ name: chem-spectra-dev channels: - - rdkit + - conda-forge - defaults dependencies: - - _libgcc_mutex=0.1=main - - _openmp_mutex=5.1=1_gnu - - blas=1.0=mkl - - bottleneck=1.3.5=py37h7deecbd_0 - - bzip2=1.0.8=h7b6447c_0 - - ca-certificates=2023.01.10=h06a4308_0 - - cairo=1.16.0=hb05425b_4 - - certifi=2022.12.7=py37h06a4308_0 - - expat=2.4.9=h6a678d5_0 - - fontconfig=2.14.1=h4c34cd2_2 - - freetype=2.12.1=h4a9f257_0 - - giflib=5.2.1=h5eee18b_3 + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_kmp_llvm + - blas=2.119=openblas + - blas-devel=3.9.0=19_linux64_openblas + - boost=1.74.0=py39h5472131_5 + - boost-cpp=1.74.0=h75c5d50_8 + - bottleneck=1.3.7=py39h389d5f1_0 + - bzip2=1.0.8=h7f98852_4 + - ca-certificates=2023.7.22=hbcca054_0 + - cairo=1.16.0=hb05425b_5 + - expat=2.5.0=hcb278e6_1 + - fontconfig=2.14.1=h52c9d5c_1 + - freetype=2.10.4=hca18f0e_2 + - giflib=5.2.1=h0b41bf4_3 - glib=2.69.1=he621ea3_2 - - icu=58.2=he6710b0_3 - - intel-openmp=2021.4.0=h06a4308_3561 - - jpeg=9e=h5eee18b_1 - - lcms2=2.12=h3be6417_0 + - greenlet=2.0.1=py39h6a678d5_0 + - icu=70.1=h27087fc_0 + - jpeg=9e=h0b41bf4_3 + - lcms2=2.15=hfd0df8a_0 - ld_impl_linux-64=2.38=h1181459_1 - lerc=3.0=h295c915_0 - - libboost=1.73.0=h28710b8_12 - - libdeflate=1.17=h5eee18b_0 - - libffi=3.4.2=h6a678d5_6 - - libgcc-ng=11.2.0=h1234567_1 - - libgomp=11.2.0=h1234567_1 - - libpng=1.6.39=h5eee18b_0 + - libblas=3.9.0=19_linux64_openblas + - libcblas=3.9.0=19_linux64_openblas + - libdeflate=1.17=h5eee18b_1 + - libexpat=2.5.0=hcb278e6_1 + - libffi=3.4.4=h6a678d5_0 + - libgcc-ng=13.2.0=h807b86a_2 + - libgfortran-ng=13.2.0=h69a702a_2 + - libgfortran5=13.2.0=ha4646dd_2 + - libiconv=1.17=h166bdaf_0 + - liblapack=3.9.0=19_linux64_openblas + - liblapacke=3.9.0=19_linux64_openblas + - libopenblas=0.3.24=pthreads_h413a1c8_0 + - libpng=1.6.39=h753d276_0 - libstdcxx-ng=11.2.0=h1234567_1 - - libtiff=4.5.0=h6a678d5_2 + - libtiff=4.5.1=h6a678d5_0 - libuuid=1.41.5=h5eee18b_0 - - libwebp=1.2.4=h11a3e52_1 - - libwebp-base=1.2.4=h5eee18b_1 - - libxcb=1.15=h7f8727e_0 - - libxml2=2.10.3=hcbfbd50_0 + - libwebp=1.3.2=h11a3e52_0 + - libwebp-base=1.3.2=hd590300_0 + - libxcb=1.16=hd590300_0 + - libxml2=2.9.14=h22db469_4 + - libzlib=1.2.13=hd590300_5 + - llvm-openmp=17.0.3=h4dfa4b3_0 - lz4-c=1.9.4=h6a678d5_0 - - mkl=2021.4.0=h06a4308_640 - - mkl-service=2.4.0=py37h7f8727e_0 - - mkl_fft=1.3.1=py37hd3c417c_0 - - mkl_random=1.2.2=py37h51133e4_0 + - matplotlib-base=3.4.3=py39h2fa2bec_2 - ncurses=6.4=h6a678d5_0 - - numexpr=2.8.4=py37he184ba9_0 - - numpy=1.21.5=py37h6c91a56_3 - - numpy-base=1.21.5=py37ha15fc14_3 - - openssl=1.1.1t=h7f8727e_0 - - packaging=22.0=py37h06a4308_0 - - pcre=8.45=h295c915_0 - - pip=22.3.1=py37h06a4308_0 + - numexpr=2.8.7=py39h286c3b5_0 + - openblas=0.3.24=pthreads_h7a3da1a_0 + - openssl=3.1.4=hd590300_0 + - pcre=8.45=h9c3ff4c_0 + - pip=23.2.1=py39h06a4308_0 - pixman=0.40.0=h7f8727e_1 - - py-boost=1.73.0=py37h51133e4_12 - - python=3.7.16=h7a1cb2a_0 - - rdkit=2020.09.1.0=py37hd50e099_1 + - pthread-stubs=0.4=h36c2ea0_1001 + - pycairo=1.23.0=py39hd1222b9_0 + - python=3.9.18=h955ad1f_0 + - python-dateutil=2.8.2=pyhd8ed1ab_0 + - python_abi=3.9=2_cp39 + - pytz=2023.3.post1=pyhd8ed1ab_0 + - rdkit=2022.03.2=py39h89e00b9_0 - readline=8.2=h5eee18b_0 - - setuptools=65.6.3=py37h06a4308_0 - - sqlite=3.41.1=h5eee18b_0 + - reportlab=3.6.12=py39h5eee18b_0 + - setuptools=68.0.0=py39h06a4308_0 + - sqlalchemy=2.0.22=py39hd1e30aa_0 + - sqlite=3.41.2=h5eee18b_0 - tk=8.6.12=h1ccaba5_0 - - wheel=0.38.4=py37h06a4308_0 - - xz=5.2.10=h5eee18b_1 - - zlib=1.2.13=h5eee18b_0 - - zstd=1.5.2=ha4553b6_0 + - tornado=6.3.3=py39hd1e30aa_1 + - typing-extensions=4.8.0=hd8ed1ab_0 + - typing_extensions=4.8.0=pyha770c72_0 + - wheel=0.41.2=py39h06a4308_0 + - xorg-libxau=1.0.11=hd590300_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=hd590300_5 + - zstd=1.5.5=hc292b87_0 - pip: - atomicwrites==1.3.0 - attrs==19.1.0 - - cftime==1.6.2 + - certifi==2022.12.7 + - cftime==1.6.3 - chardet==3.0.4 - click==8.0.0 + - contourpy==1.1.1 - coverage==4.5.3 - cycler==0.10.0 - - datetime==5.1 - entrypoints==0.3 + - exceptiongroup==1.1.3 - flake8==3.7.7 - flask==2.2.5 + - flask-jwt-extended==4.5.2 + - fonttools==4.43.1 - gunicorn==19.9.0 - idna==2.7 - importlib-metadata==3.6.0 + - importlib-resources==5.1.3 + - iniconfig==2.0.0 - itsdangerous==2.0.0 - jinja2==3.1.2 - - kiwisolver==1.4.4 + - kiwisolver==1.4.5 - markupsafe==2.1.2 - - matplotlib==3.0.0 + - matplotlib==3.8.0 - mccabe==0.6.1 - more-itertools==7.2.0 - - netcdf4==1.5.3 + - netcdf4==1.6.4 + - numpy==1.26.0 - olefile==0.46 - - pandas==0.23.4 + - packaging==23.2 + - pandas==2.1.1 - pathlib2==2.3.4 - pillow==9.3.0 - pluggy==0.12.0 - py==1.11.0 - pycodestyle==2.5.0 - pyflakes==2.1.1 + - pyjwt==2.8.0 - pymzml==2.5.2 - pyopenms==2.6.0 - pyparsing==2.4.2 - - pytest==4.0.0 - - python-dateutil==2.7.3 - - pytz==2019.1 + - pytest==7.4.2 - regex==2019.4.9 - requests==2.25.0 - - scipy==1.2.0 + - scipy==1.10.0 - six==1.11.0 - - typing-extensions==4.5.0 + - tomli==2.0.1 + - tzdata==2023.3 - urllib3==1.26.5 - werkzeug==2.2.3 - zipp==0.5.2 - - zope-interface==6.0 - - -e git+https://github.com/ComPlat/nmrglue.git@c5a7d4d0073fedff68808b4e9c95836a8c20413e#egg=nmrglue + - -e git+https://github.com/ComPlat/nmrglue.git@development#egg=nmrglue prefix: /home/eln/anaconda3/envs/chem-spectra-dev diff --git a/requirements.txt b/requirements.txt index 91f3c315..e8a748fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,15 +14,15 @@ importlib-metadata==3.6.0 itsdangerous==2.0 Jinja2==3.1.2 MarkupSafe==2.1.2 -matplotlib==3.0.0 +matplotlib==3.8.0 mccabe==0.6.1 more-itertools==7.2.0 # -e git+https://github.com/ComPlat/nmrglue.git@c5a7d4d0073fedff68808b4e9c95836a8c20413e#egg=nmrglue --e git+https://github.com/ComPlat/nmrglue.git@68388863ed3d7a6def7837155432ceb2134dae92#egg=nmrglue -netCDF4==1.5.3 -numpy==1.21.5 +-e git+https://github.com/ComPlat/nmrglue.git@development#egg=nmrglue +netCDF4==1.6.4 +numpy==1.26.0 olefile==0.46 -pandas==0.23.4 +pandas==2.1.1 pathlib2==2.3.4 Pillow==9.3.0 pluggy==0.12.0 @@ -31,12 +31,12 @@ pycodestyle==2.5.0 pyflakes==2.1.1 pymzml==2.5.2 pyparsing==2.4.2 -pytest==4.0.0 -python-dateutil==2.7.3 -pytz==2019.1 +pytest==7.4.2 +python-dateutil==2.8.2 +pytz==2023.3.post1 regex==2019.4.9 requests==2.25.0 -scipy==1.2.0 +scipy==1.10.0 six==1.11.0 urllib3==1.26.5 Werkzeug==2.2.3 diff --git a/tests/lib/converter/jcamp/test_data_parse.py b/tests/lib/converter/jcamp/test_data_parse.py new file mode 100644 index 00000000..4d8e8216 --- /dev/null +++ b/tests/lib/converter/jcamp/test_data_parse.py @@ -0,0 +1,26 @@ +import pytest +from chem_spectra.lib.converter.jcamp.data_parse import read_parsed_jdx_data + +@pytest.fixture +def parsed_data_with_old_dic_structure(): + return {'DATATYPE': ['NMR']}, [] + +@pytest.fixture +def parsed_data_with_new_dic_structure(): + return { + '_datatype_':[ + {'DATATYPE': ['NMR']} + ] + }, [] + +def test_read_parsed_jdx_data_with_old_dic(parsed_data_with_old_dic_structure): + dic, data = read_parsed_jdx_data(parsed_data_with_old_dic_structure) + assert isinstance(dic, dict) + assert dic['DATATYPE'] == ['NMR'] + assert isinstance(data, list) + +def test_read_parsed_jdx_data_with_new_dic(parsed_data_with_new_dic_structure): + dic, data = read_parsed_jdx_data(parsed_data_with_new_dic_structure) + assert isinstance(dic, dict) + assert dic['DATATYPE'] == ['NMR'] + assert isinstance(data, list) diff --git a/tests/test_bagit.py b/tests/test_bagit.py index 26687f57..cd274ed2 100644 --- a/tests/test_bagit.py +++ b/tests/test_bagit.py @@ -4,7 +4,6 @@ from chem_spectra.controller.helper.file_container import FileContainer from chem_spectra.lib.shared.buffer import store_str_in_tmp, store_byte_in_tmp -from chem_spectra.lib.converter.jcamp.base import JcampBaseConverter from chem_spectra.lib.converter.bagit.base import BagItBaseConverter target_dir = './tests/fixtures/'