diff --git a/messytables/any.py b/messytables/any.py index 02cf25c..24b2827 100644 --- a/messytables/any.py +++ b/messytables/any.py @@ -110,7 +110,7 @@ def guess_ext(ext): return lookup.get(ext, None) -def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True): +def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True, **kw): """Reads any supported table type according to a specified MIME type or file extension or automatically detecting the type. @@ -133,7 +133,7 @@ def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True): if mimetype is not None: attempt = guess_mime(mimetype) if attempt: - return parsers[attempt](fileobj) + return parsers[attempt](fileobj, **kw) else: error.append( 'Did not recognise MIME type given: "{mimetype}".'.format( @@ -142,7 +142,7 @@ def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True): if short_ext is not '': attempt = guess_ext(short_ext) if attempt: - return parsers[attempt](fileobj) + return parsers[attempt](fileobj, **kw) else: error.append( 'Did not recognise extension "{ext}" (given "{full})".'.format( @@ -152,7 +152,7 @@ def any_tableset(fileobj, mimetype=None, extension='', auto_detect=True): magic_mime = get_mime(fileobj) attempt = guess_mime(magic_mime) if attempt: - return parsers[attempt](fileobj) + return parsers[attempt](fileobj, **kw) else: error.append( 'Did not recognise detected MIME type: "{mimetype}".'.format( diff --git a/messytables/commas.py b/messytables/commas.py index 38d5cc6..65dd999 100644 --- a/messytables/commas.py +++ b/messytables/commas.py @@ -66,6 +66,7 @@ def __next__(self): next = __next__ + def to_unicode_or_bust(obj, encoding='utf-8'): if isinstance(obj, byte_string): obj = unicode_string(obj, encoding) @@ -78,7 +79,7 @@ class CSVTableSet(TableSet): def __init__(self, fileobj, delimiter=None, quotechar=None, name=None, encoding=None, window=None, doublequote=None, - lineterminator=None, skipinitialspace=None): + lineterminator=None, skipinitialspace=None, **kw): self.fileobj = messytables.seekable_stream(fileobj) self.name = name or 'table' self.delimiter = delimiter @@ -113,6 +114,7 @@ def __init__(self, name, fileobj, delimiter=None, quotechar=None, self.name = name seekable_fileobj = messytables.seekable_stream(fileobj) self.fileobj = UTF8Recoder(seekable_fileobj, encoding) + def fake_ilines(fobj): for row in fobj: yield row.decode('utf-8') @@ -181,7 +183,7 @@ def rows(): try: for row in csv.reader(rows(), - dialect=self._dialect, **self._overrides): + dialect=self._dialect, **self._overrides): yield [Cell(to_unicode_or_bust(c)) for c in row] except csv.Error as err: if u'newline inside string' in unicode_string(err) and sample: diff --git a/messytables/excel.py b/messytables/excel.py index 94a3867..9d30131 100644 --- a/messytables/excel.py +++ b/messytables/excel.py @@ -25,12 +25,13 @@ class InvalidDateError(Exception): 4: IntegerType() } + class XLSTableSet(TableSet): """An excel workbook wrapper object. """ - def __init__(self, fileobj=None, filename=None, - window=None, encoding=None, with_formatting_info=True): + def __init__(self, fileobj=None, filename=None, window=None, + encoding=None, with_formatting_info=True, **kw): '''Initialize the tableset. :param encoding: passed on to xlrd.open_workbook function diff --git a/messytables/html.py b/messytables/html.py index 3943c01..2214363 100644 --- a/messytables/html.py +++ b/messytables/html.py @@ -4,16 +4,18 @@ import html5lib import xml.etree.ElementTree as etree + def fromstring(s): tb = html5lib.getTreeBuilder("lxml", implementation=etree) p = html5lib.HTMLParser(tb, namespaceHTMLElements=False) return p.parse(s) + class HTMLTableSet(TableSet): """ A TableSet from a HTML document. """ - def __init__(self, fileobj=None, filename=None, window=None): + def __init__(self, fileobj=None, filename=None, window=None, **kw): if filename is not None: fh = open(filename, 'r') diff --git a/messytables/ods.py b/messytables/ods.py index ca838f0..7b03d74 100644 --- a/messytables/ods.py +++ b/messytables/ods.py @@ -19,6 +19,7 @@ 'date': DateType(None), } + class ODSTableSet(TableSet): """ A wrapper around ODS files. Because they are zipped and the info we want @@ -27,7 +28,7 @@ class ODSTableSet(TableSet): the remote URL. """ - def __init__(self, fileobj, window=None): + def __init__(self, fileobj, window=None, **kw): '''Initialize the object. :param fileobj: may be a file path or a file-like object. Note the diff --git a/messytables/pdf.py b/messytables/pdf.py index 69a0294..4f9052e 100644 --- a/messytables/pdf.py +++ b/messytables/pdf.py @@ -45,7 +45,7 @@ class PDFTableSet(TableSet): """ A TableSet from a PDF document. """ - def __init__(self, fileobj=None, filename=None): + def __init__(self, fileobj=None, filename=None, **kw): if get_tables is None: raise ImportError("pdftables is not installed") if filename is not None: diff --git a/messytables/types.py b/messytables/types.py index bd3021f..7cdc367 100644 --- a/messytables/types.py +++ b/messytables/types.py @@ -83,7 +83,7 @@ def cast(self, value): if value.is_integer(): return int(value) else: - raise ValueError() + raise ValueError('Invalid integer: %s' % value) class DecimalType(CellType): diff --git a/messytables/zip.py b/messytables/zip.py index 3574855..4707d47 100644 --- a/messytables/zip.py +++ b/messytables/zip.py @@ -6,7 +6,7 @@ class ZIPTableSet(messytables.TableSet): """ Reads TableSets from inside a ZIP file """ - def __init__(self, fileobj): + def __init__(self, fileobj, **kw): """ On error it will raise messytables.ReadError. """ @@ -17,7 +17,7 @@ def __init__(self, fileobj): for f in z.infolist(): ext = None - #ignore metadata folders added by Mac OS X + # ignore metadata folders added by Mac OS X if '__MACOSX' in f.filename: continue @@ -26,7 +26,7 @@ def __init__(self, fileobj): try: filetables = messytables.any.any_tableset( - z.open(f), extension=ext) + z.open(f), extension=ext, **kw) except ValueError as e: found.append(f.filename + ": " + e.message) continue @@ -38,5 +38,5 @@ def __init__(self, fileobj): tables (%s).''' % ', '.join(found)) finally: z.close() - + self._tables = tables