You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Bug report
Running iterHdf5ToDict with a non-None groupname parameter will result in a runtime error when it executes the getGroupInputDataLength(f) line. This is because f is defined to be the hdf5 group, not the file.
In arrayUtils.py line 88, we call hg.keys() which is only valid for the hdf5 file.
The following code reproduces the runtime error. To run this you'll need an environment with h5py and tables_io installed.
import h5py
import json
from tables_io.ioUtils import iterHdf5ToDict
GROUPNAME = 'example_group'
# write an example hdf5 file using variable length strings
with h5py.File('bug_example.hdf5', 'w') as file:
dicts = [
{'a':0, 'b':[[8,3]]},
{'a':1, 'b':[[1,2,3]]},
{'a':2, 'b':[[1,2,3,4,5],[7,8,9]]},
]
# convert the dictionaries to json strings
data = [json.dumps(this_dict) for this_dict in dicts]
dt = h5py.special_dtype(vlen=str)
# add the data to the hdf5 file
dataset = file.create_dataset(GROUPNAME, data=data, dtype=dt)
SHOW_BUG = True
if SHOW_BUG:
# get an iterator into the hdf5 file
buggy_iter = iterHdf5ToDict(
"bug_example.hdf5",
chunk_size=1,
groupname=GROUPNAME,
rank=0,
parallel_size=1)
# use the iterator to read the lines, convert to dictionaries, and print
for start, end, data in buggy_iter:
dicts = [json.loads(this_line) for this_line in data]
print(f"Start, end: {start, end}")
print(dicts)
else:
# get an iterator into the hdf5 file
good_iter = iterHdf5ToDict(
"bug_example.hdf5",
chunk_size=1,
groupname=None,
rank=0,
parallel_size=1)
# use the iterator to read the lines, convert to dictionaries, and print
for start, end, data in good_iter:
dicts = [json.loads(this_line) for this_line in data[GROUPNAME]]
print(f"Start, end: {start, end}")
print(dicts)
Produces the following error:
Traceback (most recent call last):
File "/home/drew/code/hdf5-test/bug_example.py", line 35, in <module>
for start, end, data in buggy_iter:
File "/home/drew/miniconda3/envs/hdf5/lib/python3.9/site-packages/tables_io/ioUtils.py", line 379, in iterHdf5ToDict
num_rows = getGroupInputDataLength(f)
File "/home/drew/miniconda3/envs/hdf5/lib/python3.9/site-packages/tables_io/arrayUtils.py", line 88, in getGroupInputDataLength
firstkey = list(hg.keys())[0]
AttributeError: 'Dataset' object has no attribute 'keys'
I believe this can be addressed with the following code in arrayUtils.py
def getGroupInputDataLength(hg):
if isinstance(hg, h5py.File):
return _getHdf5FileLength(hg)
elif isinstance(hg, h5py.Group):
return _getHdf5GroupLength(hg)
def _getHdf5FileLength(hg):
firstkey = list(hg.keys())[0]
nrows = len(hg[firstkey])
firstname = hg[firstkey].name
for value in hg.values():
if len(value) != nrows:
raise ValueError(
f"Group does not represent a table. Length ({len(value)})"
f"of column {value.name} not not match length ({nrows}) of"
f"first column {firstname}"
)
return nrows
def _getHdf5GroupLength(hg):
return len(hg)
The text was updated successfully, but these errors were encountered:
Bug report
Running
iterHdf5ToDict
with a non-None groupname parameter will result in a runtime error when it executes thegetGroupInputDataLength(f)
line. This is because f is defined to be the hdf5 group, not the file.In arrayUtils.py line 88, we call
hg.keys()
which is only valid for the hdf5 file.The following code reproduces the runtime error. To run this you'll need an environment with
h5py
andtables_io
installed.Produces the following error:
I believe this can be addressed with the following code in arrayUtils.py
The text was updated successfully, but these errors were encountered: