Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rebase from upstream and add stable version #2

Merged
merged 8 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
python-version: [3.7, 3.8]

steps:
- uses: actions/checkout@v2
Expand Down
97 changes: 78 additions & 19 deletions nmrglue/fileio/jcampdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _getkey(keystr):
.replace("-", "").replace("_", "").replace("/", ""))


def _readrawdic(filename):
def _readrawdic(filename, read_err=None):
'''
Reads JCAMP-DX file to dictionary, from which actual
data is parsed later. Dictionary contains each data
Expand All @@ -46,7 +46,7 @@ def _readrawdic(filename):

diclist = [] # for separating multiple data sections (multiple ##END tags)
dic = {"_comments": []} # create empty dictionary
filein = open(filename, 'r')
filein = open(filename, 'r', errors=read_err)

currentkey = None
currentvaluestrings = []
Expand Down Expand Up @@ -199,6 +199,8 @@ def _detect_format(dataline):
firstvalue_re = re.compile(
r"(\s)*([+-]?\d+\.?\d*|[+-]?\.\d+)([eE][+-]?\d+)?(\s)*")

xy_re = re.compile('^[0-9\.]+, [0-9\.]+')

index = firstvalue_re.match(dataline).end()
if index is None:
return -1
Expand All @@ -213,6 +215,10 @@ def _detect_format(dataline):
return 1
if firstchar in _DUP_DIGITS:
return 1

if re.search(xy_re, dataline):
return 2

return 0


Expand Down Expand Up @@ -395,10 +401,33 @@ def _parse_pseudo(datalines):
return data


def _parse_xy_xy(datalines):
pts = []
len_group_data = 0
for dataline in datalines:
if not dataline:
continue
xy_re = re.compile('[^ ][0-9\.]+, [0-9\.]+')
group_data = re.findall(xy_re, dataline)
len_group_data = len(group_data)
for data in group_data:
x, y = data.split(', ')
pts.append([float(x), float(y)])

if len_group_data > 1:
return [pts]
else:
return pts


def _parse_data(datastring):
'''
Creates numpy array from datalines
'''
probe_data = datastring[80:320]
if ',' in probe_data and not('.' in probe_data): # fix comma as decimal points
datastring = datastring.replace(',', '.')

datalines = datastring.split("\n")
headerline = datalines[0]

Expand All @@ -412,6 +441,8 @@ def _parse_data(datastring):
data = _parse_pseudo(datalines)
elif mode == 0:
data = _parse_affn_pac(datalines)
elif mode == 2:
data = _parse_xy_xy(datalines)
else:
return None
if data is None:
Expand Down Expand Up @@ -461,7 +492,7 @@ def find_yfactors(dic):
return (factor_r, factor_i)


def _getdataarray(dic):
def _getdataarray(dic, show_all_data=False):
'''
Main function for data array parsing, input is the
raw dictionary from _readrawdic
Expand Down Expand Up @@ -491,19 +522,23 @@ def _getdataarray(dic):
idatalist.append(data)
else:
rdatalist.append(data)
if len(rdatalist) > 1:
warn("NTUPLES: multiple real arrays, returning first one only")
if len(idatalist) > 1:
warn("NTUPLES: multiple imaginary arrays, \
returning first one only")
if rdatalist:
if idatalist:
data = [rdatalist[0], idatalist[0]]
else:
data = rdatalist[0]

if show_all_data:
data = { 'real': rdatalist, 'imaginary': idatalist }
else:
if idatalist:
data = [None, idatalist[0]]
if len(rdatalist) > 1:
warn("NTUPLES: multiple real arrays, returning first one only")
if len(idatalist) > 1:
warn("NTUPLES: multiple imaginary arrays, \
returning first one only")
if rdatalist:
if idatalist:
data = [rdatalist[0], idatalist[0]]
else:
data = rdatalist[0]
else:
if idatalist:
data = [None, idatalist[0]]

if data is None: # XYDATA
try:
Expand All @@ -518,14 +553,27 @@ def _getdataarray(dic):
except KeyError:
warn("XYDATA not found ")

if data is None:
return None
if data is None: # PEAK TABLE
try:
valuelist = dic["PEAKTABLE"]
if len(valuelist) == 1:
data, datatype = _parse_data(valuelist[0])
else:
warn("Multiple PEAKTABLE arrays in JCAMP-DX file, \
returning first one only")
except KeyError:
warn("PEAKTABLE not found ")

# apply YFACTOR to data if available
if is_ntuples:
yfactor_r, yfactor_i = find_yfactors(dic)
if yfactor_r is None or yfactor_r is None:
warn("NTUPLES: YFACTORs not applied, parsing failed")
elif show_all_data:
for i, _ in enumerate(data['real']):
data['real'][i] = data['real'][i] * yfactor_r
for i, _ in enumerate(data['imaginary']):
data['imaginary'][i] = data['imaginary'][i] * yfactor_i
else:
data[0] = data[0] * yfactor_r
data[1] = data[1] * yfactor_i
Expand All @@ -541,7 +589,7 @@ def _getdataarray(dic):
return data


def read(filename):
def read(filename, show_all_data=False, read_err=None):
"""
Read JCAMP-DX file

Expand All @@ -567,13 +615,24 @@ def read(filename):
# first read everything (including data array) to "raw" dictionary,
# in which data values are read as raw strings including whitespace
# and newlines
dic = _readrawdic(filename)
dic = _readrawdic(filename, read_err)

# select the relevant data section.
# first try to parse NMRSPECTRUM sections in order,
# and go with first that has proper data:
data = None
correctdic = None

# find and parse NMR data array from raw dic
data = _getdataarray(dic, show_all_data)

# remove data tables from dic
try:
dic['XYDATA_OLD'] = dic["XYDATA"]
del dic["XYDATA"]
except KeyError:
pass

try:
subdiclist = dic["_datatype_NMRSPECTRUM"]
for subdic in subdiclist:
Expand Down
Loading