Skip to content

Commit

Permalink
refactor: update to read parsed jdx file
Browse files Browse the repository at this point in the history
  • Loading branch information
Lan Le committed Oct 25, 2023
1 parent 4e4b02d commit d008aa7
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 80 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ jobs:

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.7
- name: Set up Python 3.9
uses: actions/setup-python@v4
with:
python-version: 3.7
python-version: 3.9
- name: Install dependencies
run: |
$CONDA/bin/conda env update --file environment.yml --name base
Expand Down
4 changes: 2 additions & 2 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ However, it is highly recommended to refer to official websites.
_Logout & login to load installations._

```
$ conda create --name chem-spectra python=3.7
$ conda create --name chem-spectra python=3.9
$ source activate chem-spectra
```

```
$ conda install -c rdkit rdkit=2020.09.1.0
$ conda install -c conda-forge rdkit
```

```
Expand Down
5 changes: 4 additions & 1 deletion chem_spectra/lib/converter/jcamp/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json

from chem_spectra.lib.converter.share import parse_params, parse_solvent
from chem_spectra.lib.converter.jcamp.data_parse import read_parsed_jdx_data
import os

data_type_json = os.path.join(os.path.dirname(__file__), 'data_type.json')
Expand Down Expand Up @@ -43,7 +44,9 @@ def __init__(self, path, params=False):
self.__read_solvent()

def __read(self, path):
return ng.jcampdx.read(path, show_all_data=True, read_err='ignore')
parsed_data = ng.jcampdx.read(path, show_all_data=True, read_err='ignore')
return_dic, return_data = read_parsed_jdx_data(parsed_data)
return return_dic, return_data

def __set_datatype(self):
dts = self.datatypes
Expand Down
21 changes: 17 additions & 4 deletions chem_spectra/lib/converter/jcamp/data_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ def make_ni_data_xs(base):

return None




def make_ni_data_xs(base):
if base.data_format and (base.data_format == '(XY..XY)'):
Expand All @@ -67,11 +65,26 @@ def make_ni_data_xs(base):


def make_ms_data_xsys(base):
if base.data is None and base.dic['XYPOINTS']:
base.data = [__parse_xy_points(base)]
if base.data is None:
if 'XYPOINTS' in base.dic and base.dic['XYPOINTS']:
base.data = [__parse_xy_points(base)]
elif 'DATATABLE' in base.dic and base.dic['DATATABLE']:
base.data_format = '(XY..XY)'
base.data = [__parse_xy_points(base)]

# base.data type is dict
if isinstance(base.data, dict):
return base.data['real']

return base.data

def read_parsed_jdx_data(parsed_data):
return_dic = {}
dic, data = parsed_data
for key in dic.keys():
if '_datatype_' in key:
return_dic = dic[key][0]
break
else:
return_dic[key] = dic[key]
return return_dic, data
146 changes: 85 additions & 61 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,110 +1,134 @@
name: chem-spectra-dev
channels:
- rdkit
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=5.1=1_gnu
- blas=1.0=mkl
- bottleneck=1.3.5=py37h7deecbd_0
- bzip2=1.0.8=h7b6447c_0
- ca-certificates=2023.01.10=h06a4308_0
- cairo=1.16.0=hb05425b_4
- certifi=2022.12.7=py37h06a4308_0
- expat=2.4.9=h6a678d5_0
- fontconfig=2.14.1=h4c34cd2_2
- freetype=2.12.1=h4a9f257_0
- giflib=5.2.1=h5eee18b_3
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_kmp_llvm
- blas=2.119=openblas
- blas-devel=3.9.0=19_linux64_openblas
- boost=1.74.0=py39h5472131_5
- boost-cpp=1.74.0=h75c5d50_8
- bottleneck=1.3.7=py39h389d5f1_0
- bzip2=1.0.8=h7f98852_4
- ca-certificates=2023.7.22=hbcca054_0
- cairo=1.16.0=hb05425b_5
- expat=2.5.0=hcb278e6_1
- fontconfig=2.14.1=h52c9d5c_1
- freetype=2.10.4=hca18f0e_2
- giflib=5.2.1=h0b41bf4_3
- glib=2.69.1=he621ea3_2
- icu=58.2=he6710b0_3
- intel-openmp=2021.4.0=h06a4308_3561
- jpeg=9e=h5eee18b_1
- lcms2=2.12=h3be6417_0
- greenlet=2.0.1=py39h6a678d5_0
- icu=70.1=h27087fc_0
- jpeg=9e=h0b41bf4_3
- lcms2=2.15=hfd0df8a_0
- ld_impl_linux-64=2.38=h1181459_1
- lerc=3.0=h295c915_0
- libboost=1.73.0=h28710b8_12
- libdeflate=1.17=h5eee18b_0
- libffi=3.4.2=h6a678d5_6
- libgcc-ng=11.2.0=h1234567_1
- libgomp=11.2.0=h1234567_1
- libpng=1.6.39=h5eee18b_0
- libblas=3.9.0=19_linux64_openblas
- libcblas=3.9.0=19_linux64_openblas
- libdeflate=1.17=h5eee18b_1
- libexpat=2.5.0=hcb278e6_1
- libffi=3.4.4=h6a678d5_0
- libgcc-ng=13.2.0=h807b86a_2
- libgfortran-ng=13.2.0=h69a702a_2
- libgfortran5=13.2.0=ha4646dd_2
- libiconv=1.17=h166bdaf_0
- liblapack=3.9.0=19_linux64_openblas
- liblapacke=3.9.0=19_linux64_openblas
- libopenblas=0.3.24=pthreads_h413a1c8_0
- libpng=1.6.39=h753d276_0
- libstdcxx-ng=11.2.0=h1234567_1
- libtiff=4.5.0=h6a678d5_2
- libtiff=4.5.1=h6a678d5_0
- libuuid=1.41.5=h5eee18b_0
- libwebp=1.2.4=h11a3e52_1
- libwebp-base=1.2.4=h5eee18b_1
- libxcb=1.15=h7f8727e_0
- libxml2=2.10.3=hcbfbd50_0
- libwebp=1.3.2=h11a3e52_0
- libwebp-base=1.3.2=hd590300_0
- libxcb=1.16=hd590300_0
- libxml2=2.9.14=h22db469_4
- libzlib=1.2.13=hd590300_5
- llvm-openmp=17.0.3=h4dfa4b3_0
- lz4-c=1.9.4=h6a678d5_0
- mkl=2021.4.0=h06a4308_640
- mkl-service=2.4.0=py37h7f8727e_0
- mkl_fft=1.3.1=py37hd3c417c_0
- mkl_random=1.2.2=py37h51133e4_0
- matplotlib-base=3.4.3=py39h2fa2bec_2
- ncurses=6.4=h6a678d5_0
- numexpr=2.8.4=py37he184ba9_0
- numpy=1.21.5=py37h6c91a56_3
- numpy-base=1.21.5=py37ha15fc14_3
- openssl=1.1.1t=h7f8727e_0
- packaging=22.0=py37h06a4308_0
- pcre=8.45=h295c915_0
- pip=22.3.1=py37h06a4308_0
- numexpr=2.8.7=py39h286c3b5_0
- openblas=0.3.24=pthreads_h7a3da1a_0
- openssl=3.1.4=hd590300_0
- pcre=8.45=h9c3ff4c_0
- pip=23.2.1=py39h06a4308_0
- pixman=0.40.0=h7f8727e_1
- py-boost=1.73.0=py37h51133e4_12
- python=3.7.16=h7a1cb2a_0
- rdkit=2020.09.1.0=py37hd50e099_1
- pthread-stubs=0.4=h36c2ea0_1001
- pycairo=1.23.0=py39hd1222b9_0
- python=3.9.18=h955ad1f_0
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python_abi=3.9=2_cp39
- pytz=2023.3.post1=pyhd8ed1ab_0
- rdkit=2022.03.2=py39h89e00b9_0
- readline=8.2=h5eee18b_0
- setuptools=65.6.3=py37h06a4308_0
- sqlite=3.41.1=h5eee18b_0
- reportlab=3.6.12=py39h5eee18b_0
- setuptools=68.0.0=py39h06a4308_0
- sqlalchemy=2.0.22=py39hd1e30aa_0
- sqlite=3.41.2=h5eee18b_0
- tk=8.6.12=h1ccaba5_0
- wheel=0.38.4=py37h06a4308_0
- xz=5.2.10=h5eee18b_1
- zlib=1.2.13=h5eee18b_0
- zstd=1.5.2=ha4553b6_0
- tornado=6.3.3=py39hd1e30aa_1
- typing-extensions=4.8.0=hd8ed1ab_0
- typing_extensions=4.8.0=pyha770c72_0
- wheel=0.41.2=py39h06a4308_0
- xorg-libxau=1.0.11=hd590300_0
- xorg-libxdmcp=1.1.3=h7f98852_0
- xz=5.4.2=h5eee18b_0
- zlib=1.2.13=hd590300_5
- zstd=1.5.5=hc292b87_0
- pip:
- atomicwrites==1.3.0
- attrs==19.1.0
- cftime==1.6.2
- certifi==2022.12.7
- cftime==1.6.3
- chardet==3.0.4
- click==8.0.0
- contourpy==1.1.1
- coverage==4.5.3
- cycler==0.10.0
- datetime==5.1
- entrypoints==0.3
- exceptiongroup==1.1.3
- flake8==3.7.7
- flask==2.2.5
- flask-jwt-extended==4.5.2
- fonttools==4.43.1
- gunicorn==19.9.0
- idna==2.7
- importlib-metadata==3.6.0
- importlib-resources==5.1.3
- iniconfig==2.0.0
- itsdangerous==2.0.0
- jinja2==3.1.2
- kiwisolver==1.4.4
- kiwisolver==1.4.5
- markupsafe==2.1.2
- matplotlib==3.0.0
- matplotlib==3.8.0
- mccabe==0.6.1
- more-itertools==7.2.0
- netcdf4==1.5.3
- netcdf4==1.6.4
- numpy==1.26.0
- olefile==0.46
- pandas==0.23.4
- packaging==23.2
- pandas==2.1.1
- pathlib2==2.3.4
- pillow==9.3.0
- pluggy==0.12.0
- py==1.11.0
- pycodestyle==2.5.0
- pyflakes==2.1.1
- pyjwt==2.8.0
- pymzml==2.5.2
- pyopenms==2.6.0
- pyparsing==2.4.2
- pytest==4.0.0
- python-dateutil==2.7.3
- pytz==2019.1
- pytest==7.4.2
- regex==2019.4.9
- requests==2.25.0
- scipy==1.2.0
- scipy==1.10.0
- six==1.11.0
- typing-extensions==4.5.0
- tomli==2.0.1
- tzdata==2023.3
- urllib3==1.26.5
- werkzeug==2.2.3
- zipp==0.5.2
- zope-interface==6.0
- -e git+https://github.com/ComPlat/nmrglue.git@c5a7d4d0073fedff68808b4e9c95836a8c20413e#egg=nmrglue
- -e git+https://github.com/ComPlat/nmrglue.git@development#egg=nmrglue
prefix: /home/eln/anaconda3/envs/chem-spectra-dev
18 changes: 9 additions & 9 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ importlib-metadata==3.6.0
itsdangerous==2.0
Jinja2==3.1.2
MarkupSafe==2.1.2
matplotlib==3.0.0
matplotlib==3.8.0
mccabe==0.6.1
more-itertools==7.2.0
# -e git+https://github.com/ComPlat/nmrglue.git@c5a7d4d0073fedff68808b4e9c95836a8c20413e#egg=nmrglue
-e git+https://github.com/ComPlat/nmrglue.git@68388863ed3d7a6def7837155432ceb2134dae92#egg=nmrglue
netCDF4==1.5.3
numpy==1.21.5
-e git+https://github.com/ComPlat/nmrglue.git@development#egg=nmrglue
netCDF4==1.6.4
numpy==1.26.0
olefile==0.46
pandas==0.23.4
pandas==2.1.1
pathlib2==2.3.4
Pillow==9.3.0
pluggy==0.12.0
Expand All @@ -31,12 +31,12 @@ pycodestyle==2.5.0
pyflakes==2.1.1
pymzml==2.5.2
pyparsing==2.4.2
pytest==4.0.0
python-dateutil==2.7.3
pytz==2019.1
pytest==7.4.2
python-dateutil==2.8.2
pytz==2023.3.post1
regex==2019.4.9
requests==2.25.0
scipy==1.2.0
scipy==1.10.0
six==1.11.0
urllib3==1.26.5
Werkzeug==2.2.3
Expand Down
26 changes: 26 additions & 0 deletions tests/lib/converter/jcamp/test_data_parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pytest
from chem_spectra.lib.converter.jcamp.data_parse import read_parsed_jdx_data

@pytest.fixture
def parsed_data_with_old_dic_structure():
return {'DATATYPE': ['NMR']}, []

@pytest.fixture
def parsed_data_with_new_dic_structure():
return {
'_datatype_':[
{'DATATYPE': ['NMR']}
]
}, []

def test_read_parsed_jdx_data_with_old_dic(parsed_data_with_old_dic_structure):
dic, data = read_parsed_jdx_data(parsed_data_with_old_dic_structure)
assert isinstance(dic, dict)
assert dic['DATATYPE'] == ['NMR']
assert isinstance(data, list)

def test_read_parsed_jdx_data_with_new_dic(parsed_data_with_new_dic_structure):
dic, data = read_parsed_jdx_data(parsed_data_with_new_dic_structure)
assert isinstance(dic, dict)
assert dic['DATATYPE'] == ['NMR']
assert isinstance(data, list)
1 change: 0 additions & 1 deletion tests/test_bagit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from chem_spectra.controller.helper.file_container import FileContainer
from chem_spectra.lib.shared.buffer import store_str_in_tmp, store_byte_in_tmp

from chem_spectra.lib.converter.jcamp.base import JcampBaseConverter
from chem_spectra.lib.converter.bagit.base import BagItBaseConverter

target_dir = './tests/fixtures/'
Expand Down

0 comments on commit d008aa7

Please sign in to comment.