diff --git a/.gitignore b/.gitignore index fe009cc..f108e2e 100644 --- a/.gitignore +++ b/.gitignore @@ -82,6 +82,7 @@ celerybeat-schedule .env # virtualenv +.venv/ venv/ ENV/ diff --git a/tap_spreadsheets_anywhere/format_handler.py b/tap_spreadsheets_anywhere/format_handler.py index ec113fa..6bea333 100644 --- a/tap_spreadsheets_anywhere/format_handler.py +++ b/tap_spreadsheets_anywhere/format_handler.py @@ -32,6 +32,11 @@ def get_streamreader(uri, universal_newlines=True, newline='', open_mode='r', en SCHEME_SEP = "://" kwargs = kwarg_dispatch.get(uri.split(SCHEME_SEP, 1)[0], lambda: {})() + # When reading in binary mode, undefine `encoding`. + # Otherwise, `smart_open` will return a `TextIOWrapper` in `"r"` mode. + # However, reading binary streams needs a `BufferedReader`. + if "b" in open_mode: + encoding = None streamreader = smart_open.open(uri, open_mode, newline=newline, errors='surrogateescape', encoding=encoding, **kwargs) if not universal_newlines and isinstance(streamreader, StreamReader):