Skip to content

Commit

Permalink
UTF-8 characters from marketo are improperly decoded singer-io#74
Browse files Browse the repository at this point in the history
  • Loading branch information
mps-machine committed Jul 29, 2021
1 parent 3c7f622 commit 1f9171e
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions tap_marketo/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,17 @@ def wait_for_export(client, state, stream, export_id):
# This function has an issue with UTF-8 data most likely caused by decode_unicode=True
# See https://github.com/singer-io/tap-marketo/pull/51/files
def stream_rows(client, stream_type, export_id):
with tempfile.NamedTemporaryFile(mode="w+", encoding="utf8") as csv_file:
with tempfile.NamedTemporaryFile(mode="w+", encoding="utf8", delete=False) as csv_file:
singer.log_info("Download starting.")
resp = client.stream_export(stream_type, export_id)
resp.encoding = 'utf-8'
for chunk in resp.iter_content(chunk_size=CHUNK_SIZE_BYTES, decode_unicode=True):
if chunk:
# Replace CR
chunk = chunk.replace('\r', '')
csv_file.write(chunk)

singer.log_info("Download completed. Begin streaming rows.")
singer.log_info("Download completed. Begin streaming rows to file: " + csv_file.name)
csv_file.seek(0)

reader = csv.reader((line.replace('\0', '') for line in csv_file), delimiter=',', quotechar='"')
Expand Down

0 comments on commit 1f9171e

Please sign in to comment.