Skip to content

Commit

Permalink
Merge pull request okfn#136 from pudo/allow-parser-args
Browse files Browse the repository at this point in the history
Pass any_tableset arguments through to specific parsers.
  • Loading branch information
drj11 committed Jul 31, 2015
2 parents 7a0d30e + e2ab7de commit 33f6d7e
Show file tree
Hide file tree
Showing 8 changed files with 22 additions and 16 deletions.
8 changes: 4 additions & 4 deletions messytables/any.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def guess_ext(ext):
return lookup.get(ext, None)


def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True):
def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True, **kw):
"""Reads any supported table type according to a specified
MIME type or file extension or automatically detecting the
type.
Expand All @@ -133,7 +133,7 @@ def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True):
if mimetype is not None:
attempt = guess_mime(mimetype)
if attempt:
return parsers[attempt](fileobj)
return parsers[attempt](fileobj, **kw)
else:
error.append(
'Did not recognise MIME type given: "{mimetype}".'.format(
Expand All @@ -142,7 +142,7 @@ def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True):
if short_ext is not '':
attempt = guess_ext(short_ext)
if attempt:
return parsers[attempt](fileobj)
return parsers[attempt](fileobj, **kw)
else:
error.append(
'Did not recognise extension "{ext}" (given "{full})".'.format(
Expand All @@ -152,7 +152,7 @@ def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True):
magic_mime = get_mime(fileobj)
attempt = guess_mime(magic_mime)
if attempt:
return parsers[attempt](fileobj)
return parsers[attempt](fileobj, **kw)
else:
error.append(
'Did not recognise detected MIME type: "{mimetype}".'.format(
Expand Down
6 changes: 4 additions & 2 deletions messytables/commas.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __next__(self):

next = __next__


def to_unicode_or_bust(obj, encoding='utf-8'):
if isinstance(obj, byte_string):
obj = unicode_string(obj, encoding)
Expand All @@ -78,7 +79,7 @@ class CSVTableSet(TableSet):

def __init__(self, fileobj, delimiter=None, quotechar=None, name=None,
encoding=None, window=None, doublequote=None,
lineterminator=None, skipinitialspace=None):
lineterminator=None, skipinitialspace=None, **kw):
self.fileobj = messytables.seekable_stream(fileobj)
self.name = name or 'table'
self.delimiter = delimiter
Expand Down Expand Up @@ -113,6 +114,7 @@ def __init__(self, name, fileobj, delimiter=None, quotechar=None,
self.name = name
seekable_fileobj = messytables.seekable_stream(fileobj)
self.fileobj = UTF8Recoder(seekable_fileobj, encoding)

def fake_ilines(fobj):
for row in fobj:
yield row.decode('utf-8')
Expand Down Expand Up @@ -181,7 +183,7 @@ def rows():

try:
for row in csv.reader(rows(),
dialect=self._dialect, **self._overrides):
dialect=self._dialect, **self._overrides):
yield [Cell(to_unicode_or_bust(c)) for c in row]
except csv.Error as err:
if u'newline inside string' in unicode_string(err) and sample:
Expand Down
5 changes: 3 additions & 2 deletions messytables/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ class InvalidDateError(Exception):
4: IntegerType()
}


class XLSTableSet(TableSet):
"""An excel workbook wrapper object.
"""

def __init__(self, fileobj=None, filename=None,
window=None, encoding=None, with_formatting_info=True):
def __init__(self, fileobj=None, filename=None, window=None,
encoding=None, with_formatting_info=True, **kw):
'''Initialize the tableset.
:param encoding: passed on to xlrd.open_workbook function
Expand Down
4 changes: 3 additions & 1 deletion messytables/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@
import html5lib
import xml.etree.ElementTree as etree


def fromstring(s):
tb = html5lib.getTreeBuilder("lxml", implementation=etree)
p = html5lib.HTMLParser(tb, namespaceHTMLElements=False)
return p.parse(s)


class HTMLTableSet(TableSet):
"""
A TableSet from a HTML document.
"""
def __init__(self, fileobj=None, filename=None, window=None):
def __init__(self, fileobj=None, filename=None, window=None, **kw):

if filename is not None:
fh = open(filename, 'r')
Expand Down
3 changes: 2 additions & 1 deletion messytables/ods.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
'date': DateType(None),
}


class ODSTableSet(TableSet):
"""
A wrapper around ODS files. Because they are zipped and the info we want
Expand All @@ -27,7 +28,7 @@ class ODSTableSet(TableSet):
the remote URL.
"""

def __init__(self, fileobj, window=None):
def __init__(self, fileobj, window=None, **kw):
'''Initialize the object.
:param fileobj: may be a file path or a file-like object. Note the
Expand Down
2 changes: 1 addition & 1 deletion messytables/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class PDFTableSet(TableSet):
"""
A TableSet from a PDF document.
"""
def __init__(self, fileobj=None, filename=None):
def __init__(self, fileobj=None, filename=None, **kw):
if get_tables is None:
raise ImportError("pdftables is not installed")
if filename is not None:
Expand Down
2 changes: 1 addition & 1 deletion messytables/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def cast(self, value):
if value.is_integer():
return int(value)
else:
raise ValueError()
raise ValueError('Invalid integer: %s' % value)


class DecimalType(CellType):
Expand Down
8 changes: 4 additions & 4 deletions messytables/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class ZIPTableSet(messytables.TableSet):
""" Reads TableSets from inside a ZIP file """

def __init__(self, fileobj):
def __init__(self, fileobj, **kw):
"""
On error it will raise messytables.ReadError.
"""
Expand All @@ -17,7 +17,7 @@ def __init__(self, fileobj):
for f in z.infolist():
ext = None

#ignore metadata folders added by Mac OS X
# ignore metadata folders added by Mac OS X
if '__MACOSX' in f.filename:
continue

Expand All @@ -26,7 +26,7 @@ def __init__(self, fileobj):

try:
filetables = messytables.any.any_tableset(
z.open(f), extension=ext)
z.open(f), extension=ext, **kw)
except ValueError as e:
found.append(f.filename + ": " + e.message)
continue
Expand All @@ -38,5 +38,5 @@ def __init__(self, fileobj):
tables (%s).''' % ', '.join(found))
finally:
z.close()

self._tables = tables

0 comments on commit 33f6d7e

Please sign in to comment.