From bf308d2d5c769f27a0cd2c42143d64f18ff5ecd8 Mon Sep 17 00:00:00 2001 From: Hans Chalupsky Date: Wed, 20 May 2020 18:56:29 -0700 Subject: [PATCH] build_command.record_key_spec: Handle the fact that sh 1.13 can now give us either strings or bytes depending on the type of file or stream we are processing. --- kgtk/cli/sort.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kgtk/cli/sort.py b/kgtk/cli/sort.py index dd8fbab4e..10147d6d6 100644 --- a/kgtk/cli/sort.py +++ b/kgtk/cli/sort.py @@ -112,12 +112,18 @@ def build_command(input=None, output=None, columns='1', colsep='\t', options='', # define these in here, so we can pass in some process-local variables via closures: def record_key_spec(chunk): + # starting with sh 1.13 it looks like we can get either strings or bytes here; + # if we get bytes we convert to an identical string using `latin1' encoding: + if isinstance(chunk, bytes): + chunk = chunk.decode('latin1') buffer.write(chunk) header = buffer.getvalue() eol = header.find('\n') if eol >= 0: with open(sort_env['KGTK_HEADER'], 'w') as out: out.write(header[0:eol+1]) + # reencode from latin1 to utf8 for header processing: + header = header[0:eol].encode('latin1').decode(zcat.kgtk_encoding) with open(sort_env['KGTK_SORT_KEY_SPEC'], 'w') as out: out.write(build_sort_key_spec(header, columns, colsep)) # this signals to ignore the callback once we are done collecting the header: