diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java
index 89c4ef2d3a..51338e488a 100644
--- a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java
+++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java
@@ -11,17 +11,18 @@
 // so we don't have to repeatedly interconvert them when fetching from this list
 
 public class NameTokenisationDecode {
-    // TODO: lift these values to a common location since they're used by the encoder, the decoder, and the tests
+    //TODO: lift these values to a common location since they're used by the encoder, the decoder, and the tests
+    //TODO: once we get clarity on the spec (https://github.com/samtools/hts-specs/issues/802) on how to
+    // calculate this, we should use it to verify the result of decompressing
     // for now, since we're returning a String of all the names (instead of a list, which is more efficient) because,
     // use a single byte to separate the names; this particular byte is chosen because the calling code in the CRAM
     // reader for read names already assumes it will be handed a block of '\0' separated names
     public final static byte NAME_SEPARATOR = 0;
     public final static CharSequence LOCAL_NAME_SEPARATOR_CHARSEQUENCE = new String(new byte[] {NAME_SEPARATOR});
-
-    //TODO: once we get clarity on the spec (https://github.com/samtools/hts-specs/issues/802) on how to
-    // calculate this, we should use it to verify the result of decompressing
     public static final int UNCOMPRESSED_LENGTH_ADJUSTMENT = 1;
 
+    public static int DEFAULT_POSITION_ALLOCATION = 30;
+
     // the input must be a ByteBuffer containing the read names, separated by the NAME_SEPARATOR byte, WITHOUT
     // a terminating separator
     public String uncompress(final ByteBuffer inBuffer) {
@@ -54,60 +55,59 @@ private String decodeSingleName(
             final int currentNameIndex) {
         // consult tokenStreams[0, TokenStreams.TOKEN_TYPE] to determine if this name uses dup or diff, and either
         // way, determine the reference name from which we will construct this name
-        final byte referenceType = tokenStreams.getTokenStream(0, TokenStreams.TOKEN_TYPE).get();
-        final ByteBuffer distStream = tokenStreams.getTokenStream(0, referenceType);
+        final byte referenceType = tokenStreams.getStream(0, TokenStreams.TOKEN_TYPE).get();
+        final ByteBuffer distStream = tokenStreams.getStream(0, referenceType);
         final int referenceName = currentNameIndex - distStream.getInt() & 0xFFFFFFFF;
 
         if (referenceType == TokenStreams.TOKEN_DUP) {
-            // propagate the existing tokens for the reference name and use them for this name, in case there is
-            // a future instance of this same name that refers to THIS name's tokens, and then reconstruct and
+            // propagate the existing tokens for the reference name and use them for this name (in case there is
+            // a future instance of this same name that refers to THIS name's tokens), and then reconstruct and
             // return the new name by joining the accumulated tokens
             decodedNameTokens.add(currentNameIndex, decodedNameTokens.get(referenceName));
             return String.join("", decodedNameTokens.get(currentNameIndex));
-        }
-
-        if (referenceType != TokenStreams.TOKEN_DIFF) {
+        } else if (referenceType != TokenStreams.TOKEN_DIFF) {
             throw new CRAMException(String.format(
                     "Invalid nameType %s. nameType must be either TOKEN_DIFF or TOKEN_DUP", referenceType));
         }
 
-        // preallocate for with 30 tokens, but let the list auto-expand if we exceed that
-        final List<String> currentNameTokens = new ArrayList<>(30);
+        // preallocate for DEFAULT_NUMBER_OF_POSITIONS token (columns), but the list size will auto-expand if we exceed that
+        final List<String> currentNameTokens = new ArrayList<>(DEFAULT_POSITION_ALLOCATION);
         final StringBuilder decodedNameBuilder = new StringBuilder();
         byte type = -1;
+
         // start at position 1; at position 0, there is only nameType information
         for (int tokenPos = 1; type != TokenStreams.TOKEN_END; tokenPos++) {
-            type = tokenStreams.getTokenStream(tokenPos, TokenStreams.TOKEN_TYPE).get();
+            type = tokenStreams.getStream(tokenPos, TokenStreams.TOKEN_TYPE).get();
             String currentToken = "";
 
-            switch(type){
+            switch (type) {
                 case TokenStreams.TOKEN_CHAR:
-                    final char currentTokenChar = (char) tokenStreams.getTokenStream(tokenPos, TokenStreams.TOKEN_CHAR).get();
+                    final char currentTokenChar = (char) tokenStreams.getStream(tokenPos, TokenStreams.TOKEN_CHAR).get();
                     currentToken = String.valueOf(currentTokenChar);
                     break;
                 case TokenStreams.TOKEN_STRING:
-                    currentToken = readString(tokenStreams.getTokenStream(tokenPos, TokenStreams.TOKEN_STRING));
+                    currentToken = readString(tokenStreams.getStream(tokenPos, TokenStreams.TOKEN_STRING));
                     break;
                 case TokenStreams.TOKEN_DIGITS:
                     currentToken = getDigitsToken(tokenStreams, tokenPos, TokenStreams.TOKEN_DIGITS);
                     break;
                 case TokenStreams.TOKEN_DIGITS0:
                     final String digits0Token = getDigitsToken(tokenStreams, tokenPos, TokenStreams.TOKEN_DIGITS0);
-                    final int lenDigits0Token = tokenStreams.getTokenStream(tokenPos, TokenStreams.TOKEN_DZLEN).get() & 0xFF;
+                    final int lenDigits0Token = tokenStreams.getStream(tokenPos, TokenStreams.TOKEN_DZLEN).get() & 0xFF;
                     currentToken = leftPadWith0(digits0Token, lenDigits0Token);
                     break;
                 case TokenStreams.TOKEN_DELTA:
-                    currentToken = getDeltaToken(tokenPos, tokenStreams, TokenStreams.TOKEN_DELTA, decodedNameTokens, referenceName);
+                    currentToken = getDeltaToken(tokenStreams, tokenPos, TokenStreams.TOKEN_DELTA, decodedNameTokens, referenceName);
                     break;
                 case TokenStreams.TOKEN_DELTA0:
-                    final String delta0Token = getDeltaToken(tokenPos, tokenStreams, TokenStreams.TOKEN_DELTA0, decodedNameTokens, referenceName);
+                    final String delta0Token = getDeltaToken(tokenStreams, tokenPos, TokenStreams.TOKEN_DELTA0, decodedNameTokens, referenceName);
                     final int lenDelta0Token = decodedNameTokens.get(referenceName).get(tokenPos-1).length();
                     currentToken = leftPadWith0(delta0Token, lenDelta0Token);
                     break;
                 case TokenStreams.TOKEN_MATCH:
                     currentToken = decodedNameTokens.get(referenceName).get(tokenPos-1);
                     break;
-                case TokenStreams.TOKEN_END: // tolerate END, it terminates the enclosing loop
+                case TokenStreams.TOKEN_END: // tolerate END, it will terminates the enclosing loop
                     break;
                 case TokenStreams.TOKEN_NOP:
                     //no-op token, inserted by the writer to take up space to keep the streams aligned
@@ -132,8 +132,8 @@ private String decodeSingleName(
     }
 
     private String getDeltaToken(
-            final int tokenPosition,
             final TokenStreams tokenStreams,
+            final int tokenPosition,
             final byte tokenType,
             final List<List<String>> previousTokensList,
             final int prevNameIndex) {
@@ -145,7 +145,7 @@ private String getDeltaToken(
         try {
             final String prevToken = previousTokensList.get(prevNameIndex).get(tokenPosition - 1);
             int prevTokenInt = Integer.parseInt(prevToken);
-            final int deltaTokenValue = tokenStreams.getTokenStream(tokenPosition, tokenType).get() & 0xFF;
+            final int deltaTokenValue = tokenStreams.getStream(tokenPosition, tokenType).get() & 0xFF;
             return Long.toString(prevTokenInt + deltaTokenValue);
         } catch (final NumberFormatException e) {
             throw new CRAMException(
@@ -163,10 +163,10 @@ private String getDigitsToken(
         if (tokenType != TokenStreams.TOKEN_DIGITS && tokenType != TokenStreams.TOKEN_DIGITS0) {
             throw new CRAMException(
                     String.format(
-                            "Invalid tokenType : %s tokenType must be either TOKEN_DIGITS or TOKEN_DIGITS0",
+                            "Invalid tokenType: %s tokenType must be either TOKEN_DIGITS or TOKEN_DIGITS0",
                             tokenType));
         }
-        final ByteBuffer digitsByteBuffer = tokenStreams.getTokenStream(tokenPosition, tokenType);
+        final ByteBuffer digitsByteBuffer = tokenStreams.getStream(tokenPosition, tokenType);
         final long digits = digitsByteBuffer.getInt() & 0xFFFFFFFFL;
         return Long.toString(digits);
     }
@@ -177,7 +177,7 @@ private String readString(final ByteBuffer inputBuffer) {
         final StringBuilder resultStringBuilder = new StringBuilder();
         byte currentByte = inputBuffer.get();
         while (currentByte != 0) {
-            //TODO: fix this sketchy cast; this will fail on non-ASCII characters
+            //TODO: fix this sketchy cast; this will fail on non-ASCII characters ?
             resultStringBuilder.append((char) currentByte);
             currentByte = inputBuffer.get();
         }
diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java
index d29ff697f2..dc084f0cc9 100644
--- a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java
+++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java
@@ -18,13 +18,19 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-//TODO: implement this from the spec: if a byte stream of token types is entirely MATCH apart from the very
-// first value it is discarded. It is possible to regenerate this during decode by observing the other byte streams.
-//TODO:its super wasteful (but simpler) to always store the accumulated tokens as Strings, since thisresults
+//TODO:its super wasteful (but simpler) to always store the accumulated tokens as Strings, since this results
 // int lots of String<-> int interconversions
+//TODO: enforce a maximum of 128 tokens
 
 /**
- * Very naive implementation of a name tokenization encoder
+ * A very naive implementation of a name tokenization encoder.
+ *
+ * It does not currently:
+ *
+ * - recognize and encode for duplicate streams (that is, it does not ever set the DUP_PREVIOUS_STREAM_FLAG_MASK flag)
+ * - detect and encode for streams that are all match, as mentioned in the spec ("if a byte stream of token types
+ *   is entirely MATCH apart from the very first value it is discarded. It is possible to regenerate this during decode
+ *   by observing the other byte streams.")
  */
 public class NameTokenisationEncode {
     private final static String READ_NAME_TOK_REGEX = "([a-zA-Z0-9]{1,9})|([^a-zA-Z0-9]+)";
@@ -36,16 +42,16 @@ public class NameTokenisationEncode {
     private final static String DIGITS_REGEX = "^[0-9]+$";
     private final static Pattern DIGITS_PATTERN = Pattern.compile(DIGITS_REGEX);
 
-    private int maxNumberOfTokens; // the maximum number of tokenised columns seen across all names
-    private int maxLength;
+    private int maxPositions; // the maximum number of tokenised columns seen across all names
+    private int maxStringValueLength; // longest *String* value for any token
 
     /**
      * Output format is the read names, separated by the NAME_SEPARATOR byte, WITHOUT a terminating separator
      */
     public ByteBuffer compress(final ByteBuffer inBuffer, final boolean useArith) {
-        // strictly speaking, keeping these in a list isn't necessary, but we have to scan the entire input
-        // anyway to determine the number of names, since that's the first thing that we need to write to the
-        // output stream, so extract the names while we're scanning
+        // strictly speaking, keeping this list isn't necessary, but since the first thing that we need to write
+        // to the output stream is the number of names, we have to scan the entire input anyway to count them,
+        // so just extract them while we're scanning
         final List<String> namesToEncode = extractInputNames(inBuffer, CRAMEncodingStrategy.DEFAULT_READS_PER_SLICE);
         final int numNames = namesToEncode.size();
         final int uncompressedInputSize = inBuffer.limit() - numNames - NameTokenisationDecode.UNCOMPRESSED_LENGTH_ADJUSTMENT;
@@ -57,134 +63,123 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final boolean useArith) {
         outBuffer.putInt(numNames);
         outBuffer.put((byte)(useArith == true ? 1 : 0));
 
-        // keep a List<List<EncodeToken>>, as we also need to store the TOKEN_TYPE, relative value when compared
-        // to prev name's token along with the token value.
-        final List<List<EncodeToken>> tokenAccumulator = new ArrayList<>(numNames);
-        // note that using a map is a little sketchy here, since read names can be duplicates; but its fine since
-        // it doesn't really matter which index is recorded here - any one will do
+        // note that using a map with a read name key is a little sketchy here, since read names can be duplicates;
+        // but its fine since it doesn't really matter which index is recorded here - any one will suffice
         final HashMap<String, Integer> nameIndexMap = new HashMap<>();
-        final int[] tokenFrequencies = new int[256];
-        for(int nameIndex = 0; nameIndex < numNames; nameIndex++) {
-            tokenAccumulator.add(
+        final int[] tokenFrequencies = new int[256]; // DELTA vs DIGIT frequency
+        // keep track of the list of encoded tokens for each name
+        final List<List<EncodeToken>> encodedTokensByName = new ArrayList<>(numNames);
+
+        for (int nameIndex = 0; nameIndex < numNames; nameIndex++) {
+            encodedTokensByName.add(
                     tokeniseName(
                             namesToEncode.get(nameIndex),
                             nameIndex,
-                            tokenAccumulator,
+                            encodedTokensByName,
                             nameIndexMap,
                             tokenFrequencies)
             );
         }
-        for (int tokenPosition = 0; tokenPosition < maxNumberOfTokens; tokenPosition++) {
-            final List<ByteBuffer> tokenStreamForPosition = fillByteStreamsForPosition(tokenPosition, numNames, tokenAccumulator);
-            serializeByteStreamsFor(tokenStreamForPosition, useArith, outBuffer);
+
+        for (int position = 0; position < maxPositions; position++) {
+            final List<ByteBuffer> streamsForPosition = distributeTokensForPosition(
+                    encodedTokensByName,
+                    position,
+                    numNames);
+            serializeTokenStreams(streamsForPosition, outBuffer, useArith);
         }
 
-        // sets limit to current position and position to '0'
-        //outBuffer.flip();
-        outBuffer.limit(outBuffer.position());
-        outBuffer.position(0);
+        outBuffer.flip(); // set the limit to current position, and reset position to '0'
         return outBuffer;
     }
 
-    // return the token list for a new name
+    // return the encoded token list for a single name
     private List<EncodeToken> tokeniseName(
             final String name,
             final int nameIndex,
-            final List<List<EncodeToken>> tokenAccumulator,
+            final List<List<EncodeToken>> encodedTokensByName,
             final HashMap<String, Integer> nameIndexMap,
             final int[] tokenFrequencies) {
-        // create a new token list for this name, and populate position 0 with the token that indicates whether
-        // the name is a DIFF or DUP
-        final List<EncodeToken> encodedTokens = new ArrayList<>();
+
         if (nameIndexMap.containsKey(name)) {
-            final String indexStr = String.valueOf(nameIndex - nameIndexMap.get(name));
-            encodedTokens.add(0, new EncodeToken(indexStr, indexStr, TokenStreams.TOKEN_DUP));
-            return encodedTokens;
+            // duplicate name, there is no need to tokenise the name, just encode the index of the duplicate
+            final String duplicateIndex = String.valueOf(nameIndex - nameIndexMap.get(name));
+            return List.of(new EncodeToken(TokenStreams.TOKEN_DUP, duplicateIndex));
         }
-        //TODO: why does this list need a 0 or 1 to tell if this is the first token ?
-        final String indexStr = String.valueOf(nameIndex == 0 ? 0 : 1);
-        encodedTokens.add(0,new EncodeToken(indexStr, indexStr, TokenStreams.TOKEN_DIFF));
+
+        final List<EncodeToken> encodedTokens = new ArrayList<>(NameTokenisationDecode.DEFAULT_POSITION_ALLOCATION);
+
+        // if this name is the first name, the diff value must be 0; otherwise for now use a naive
+        // strategy and only/always diff against the (immediately) preceding name
+        encodedTokens.add(0, new EncodeToken(TokenStreams.TOKEN_DIFF, String.valueOf(nameIndex == 0 ? 0 : 1)));
         nameIndexMap.put(name, nameIndex);
 
         // tokenise the current name
         final Matcher matcher = READ_NAME_PATTERN.matcher(name);
-        final List<String> tok = new ArrayList<>();
-        while (matcher.find()) {
-            tok.add(matcher.group());
-        }
-
-        int currMaxLength = 0;
-        for (int i = 0; i < tok.size(); i++) {
-            // In the list of tokens, all the tokens are offset by 1
-            // because at position "0", we have a token that provides info if the name is a DIFF or DUP
-            // token 0 = DIFF vs DUP
-            int tokenIndex = i + 1;
+        for (int i = 1; matcher.find(); i++) {
             byte type = TokenStreams.TOKEN_STRING;
-            final String str = tok.get(i); // absolute value of the token
-            String val = str; // relative value of the token (comparing to prevname's token at the same token position)
+            final String fragmentValue = matcher.group(); // absolute value of the token
+            String relativeValue = fragmentValue; // relative value of the token (comparing to prev name's token at the same token position)
 
-            if (DIGITS0_PATTERN.matcher(str).matches()) {
+            if (DIGITS0_PATTERN.matcher(fragmentValue).matches()) {
                 type = TokenStreams.TOKEN_DIGITS0;
-            } else if (DIGITS_PATTERN.matcher(str).matches()) {
+            } else if (DIGITS_PATTERN.matcher(fragmentValue).matches()) {
                 type = TokenStreams.TOKEN_DIGITS;
-            } else if (str.length() == 1) {
+            } else if (fragmentValue.length() == 1) {
                 type = TokenStreams.TOKEN_CHAR;
             }
 
-            // compare the current token with token from the previous name at the current token's index
-            // if there exists a previous name and a token at the corresponding index of the previous name
-            final int prevNameIndex = nameIndex - 1; // naive implementation where wer always compare against last name only
-            if (prevNameIndex >=0 && tokenAccumulator.get(prevNameIndex).size() > tokenIndex) {
-                final EncodeToken prevToken = tokenAccumulator.get(prevNameIndex).get(tokenIndex);
-                if (prevToken.getActualTokenValue().equals(tok.get(i))) {
+            // compare the current token with token from the previous name (this naive implementation always
+            // compares against last name only)
+            final int prevNameIndex = nameIndex - 1;
+            if (prevNameIndex >= 0 && encodedTokensByName.get(prevNameIndex).size() > i) {
+                //there exists a token at the corresponding position of the previous name
+                final EncodeToken prevToken = encodedTokensByName.get(prevNameIndex).get(i);
+                if (prevToken.getActualValue().equals(fragmentValue)) {
+                    // identical to the previous name's token in this position
                     type = TokenStreams.TOKEN_MATCH;
-                    val = "";
-                } else if (type==TokenStreams.TOKEN_DIGITS
-                        && (prevToken.getTokenType() == TokenStreams.TOKEN_DIGITS || prevToken.getTokenType() == TokenStreams.TOKEN_DELTA)) {
-                    int v = Integer.parseInt(val);
-                    int s = Integer.parseInt(prevToken.getActualTokenValue());
-                    int d = v - s;
-                    tokenFrequencies[tokenIndex]++;
-                    if (d >= 0 && d < 256 && tokenFrequencies[tokenIndex] > nameIndex / 2) {
+                    relativeValue = null;
+                } else if (type==TokenStreams.TOKEN_DIGITS &&
+                        (prevToken.getTokenType() == TokenStreams.TOKEN_DIGITS || prevToken.getTokenType() == TokenStreams.TOKEN_DELTA)) {
+                    int d = Integer.parseInt(relativeValue) - Integer.parseInt(prevToken.getActualValue());
+                    tokenFrequencies[i]++;
+                    if (d >= 0 && d < 256 && tokenFrequencies[i] > nameIndex / 2) {
                         type = TokenStreams.TOKEN_DELTA;
-                        val = String.valueOf(d);
+                        relativeValue = String.valueOf(d);
                     }
-                } else if (type == TokenStreams.TOKEN_DIGITS0 && prevToken.getActualTokenValue().length() == val.length()
-                        && (prevToken.getTokenType() == TokenStreams.TOKEN_DIGITS0 || prevToken.getTokenType() == TokenStreams.TOKEN_DELTA0)) {
-                    int d = Integer.parseInt(val) - Integer.parseInt(prevToken.getActualTokenValue());
-                    tokenFrequencies[tokenIndex]++;
-                    if (d >= 0 && d < 256 && tokenFrequencies[tokenIndex] > nameIndex / 2) {
+                } else if (type == TokenStreams.TOKEN_DIGITS0 &&
+                        prevToken.getActualValue().length() == relativeValue.length() &&
+                        (prevToken.getTokenType() == TokenStreams.TOKEN_DIGITS0 || prevToken.getTokenType() == TokenStreams.TOKEN_DELTA0)) {
+                    int d = Integer.parseInt(relativeValue) - Integer.parseInt(prevToken.getActualValue());
+                    tokenFrequencies[i]++;
+                    if (d >= 0 && d < 256 && tokenFrequencies[i] > nameIndex / 2) {
                         type = TokenStreams.TOKEN_DELTA0;
-                        val = String.valueOf(d);
+                        relativeValue = String.valueOf(d);
                     }
                 }
             }
-            encodedTokens.add(new EncodeToken(str, val, type));
-
-            if (currMaxLength < val.length() + 3) {
-                // TODO: check this? Why isn't unint32 case handled?
-                // +3 for integers; 5 -> (Uint32)5 (from htscodecs javascript code)
-                //if (max_len < T[n][t].val.length+3)  // +3 for integers; 5 -> (Uint32)5
-                //    max_len = T[n][t].val.length+3
-                currMaxLength = val.length() + 3;
+            encodedTokens.add(new EncodeToken(type, fragmentValue, relativeValue));
+            if (type == TokenStreams.TOKEN_STRING) {
+                // update our longest value length, adding one for the null terminator we'll embed in the stream
+                maxStringValueLength = Math.max(maxStringValueLength, fragmentValue.length() + 1);
             }
         }
 
-        encodedTokens.add(new EncodeToken("","",TokenStreams.TOKEN_END));
-        final int currMaxToken = encodedTokens.size();
-        if (maxNumberOfTokens < currMaxToken) {
-            maxNumberOfTokens = currMaxToken;
-        }
-        if (maxLength < currMaxLength) {
-            maxLength = currMaxLength;
+        encodedTokens.add(new EncodeToken(TokenStreams.TOKEN_END));
+
+        // keep track of the longest list of encoded tokens (this is basically the number of columns/fragments)
+        // for use by downstream buffer allocations
+        final int currMaxPositions = encodedTokens.size();
+        if (maxPositions < currMaxPositions) {
+            maxPositions = currMaxPositions;
         }
 
         return encodedTokens;
     }
 
+    // extract the individual names from the input buffer and return in a list
     private static List<String> extractInputNames(final ByteBuffer inBuffer, final int preAllocationSize) {
-        final List<String> names = new ArrayList(CRAMEncodingStrategy.DEFAULT_READS_PER_SLICE);
-        // extract the individual names from the input buffer
+        final List<String> names = new ArrayList(preAllocationSize);
         for (int lastPosition = inBuffer.position(); inBuffer.hasRemaining();) {
             final byte currentByte = inBuffer.get();
             if (currentByte == NameTokenisationDecode.NAME_SEPARATOR || inBuffer.position() == inBuffer.limit()) {
@@ -204,87 +199,100 @@ private static List<String> extractInputNames(final ByteBuffer inBuffer, final i
         return names;
     }
 
-    // NOTE: the calling code relies on the fact that on return, the position of each ByteBuffer corresponds to the
-    // end of the corresponding stream
-    private List<ByteBuffer> fillByteStreamsForPosition(
+    // go through all names, appending the encoded token for the current position to the appropriate byte stream
+    // NOTE: the calling code relies on the fact that on return, the position of each ByteBuffer corresponds
+    // to the location of the end of the stream
+    private List<ByteBuffer> distributeTokensForPosition(
+            final List<List<EncodeToken>> tokenAccumulator,
             final int tokenPosition,
-            final int numNames,
-            final List<List<EncodeToken>> tokenAccumulator) {
-        // create the outer list, but don't allocate bytestreams until we actually need them, since this list is
-        // often quite sparse
+            final int numNames) {
+        // create a list, but don't preallocate the actual ByteBuffers until we actually need them (except
+        // for the TOKEN_TYPE stream, which we'll need for sure), since this list can be quite sparse
         final List<ByteBuffer> tokenStreams = Arrays.asList(new ByteBuffer[TokenStreams.TOTAL_TOKEN_TYPES]);
-         // for the token type stream
-        getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_TYPE, numNames); // for the one allocation we'll need for sure
+        // we need 1 byte per name (so, numNames) for the type stream; the distance values go in the
+        // individual streams (dup or diff)
+        getByteBufferFor(tokenStreams, TokenStreams.TOKEN_TYPE, numNames);
 
+        // for each name, push the name's encoded token for this position on to the end of the appropriate byte stream
         for (int n = 0; n < numNames; n++) {
-            final List<EncodeToken> encodedTokensForName = tokenAccumulator.get(n);
-            if (tokenPosition > 0 && encodedTokensForName.get(0).getTokenType() == TokenStreams.TOKEN_DUP) {
+            final List<EncodeToken> tokensForName = tokenAccumulator.get(n);
+            if (tokenPosition > 0 && tokensForName.get(0).getTokenType() == TokenStreams.TOKEN_DUP) {
+                // there is nothing more to do for this name, since it's a duplicate of a previous name
+                // (the index of which was written to the (dup) stream when position 0 was (previously) processed
                 continue;
             }
-            if (encodedTokensForName.size() <= tokenPosition) {
+            if (tokensForName.size() <= tokenPosition) {
+                // no encoded token for this position for this name
                 continue;
             }
-            final EncodeToken encodeToken = encodedTokensForName.get(tokenPosition);
+
+            // write the token type for this name/position to the TOKEN_TYPE stream
+            final EncodeToken encodeToken = tokensForName.get(tokenPosition);
             final byte type = encodeToken.getTokenType();
             tokenStreams.get(TokenStreams.TOKEN_TYPE).put(type);
+
+            // now write out the associated value for any token type that has one
+            // whenever we allocate a new bytebuffer, size it according to the max that it can ever
+            // be; this might be over-allocating, but that's better than under-allocating, and the
+            // stream will be trimmed when it is written out
             switch (type) {
                 case TokenStreams.TOKEN_DIFF:
-                    getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_DIFF, numNames)
-                            .putInt(Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    getByteBufferFor(tokenStreams, TokenStreams.TOKEN_DIFF, numNames * 4)
+                            .putInt(Integer.parseInt(encodeToken.getRelativeValue()));
                     break;
 
                 case TokenStreams.TOKEN_DUP:
-                    getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_DUP, numNames)
-                            .putInt(Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    getByteBufferFor(tokenStreams, TokenStreams.TOKEN_DUP, numNames * 4)
+                            .putInt(Integer.parseInt(encodeToken.getRelativeValue()));
                     break;
 
                 case TokenStreams.TOKEN_STRING:
                     writeString(
-                            getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_STRING, numNames),
-                            encodeToken.getRelativeTokenValue()
+                            getByteBufferFor(tokenStreams, TokenStreams.TOKEN_STRING, numNames * maxStringValueLength),
+                            encodeToken.getRelativeValue()
                     );
                     break;
 
                 case TokenStreams.TOKEN_CHAR:
-                    getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_CHAR, numNames)
-                            .put((byte) encodeToken.getRelativeTokenValue().charAt(0));
+                    getByteBufferFor(tokenStreams, TokenStreams.TOKEN_CHAR, numNames * 1)
+                            .put((byte) encodeToken.getRelativeValue().charAt(0));
                     break;
 
                 case TokenStreams.TOKEN_DIGITS:
-                    getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_DIGITS, numNames)
-                            .putInt(Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    getByteBufferFor(tokenStreams, TokenStreams.TOKEN_DIGITS, numNames * 4)
+                            .putInt(Integer.parseInt(encodeToken.getRelativeValue()));
                     break;
 
                 case TokenStreams.TOKEN_DIGITS0:
-                    getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_DIGITS0, numNames)
-                            .putInt(Integer.parseInt(encodeToken.getRelativeTokenValue()));
-                    getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_DZLEN, numNames)
-                        .put((byte) encodeToken.getRelativeTokenValue().length());
+                    getByteBufferFor(tokenStreams, TokenStreams.TOKEN_DIGITS0, numNames * 4)
+                            .putInt(Integer.parseInt(encodeToken.getRelativeValue()));
+                    getByteBufferFor(tokenStreams, TokenStreams.TOKEN_DZLEN, numNames)
+                        .put((byte) encodeToken.getRelativeValue().length());
                     break;
 
                 case TokenStreams.TOKEN_DELTA:
-                    getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_DELTA, numNames)
-                            .put((byte)Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    //TODO: test this conversion on numbers > 127 (Int.parseInt to byte)
+                    getByteBufferFor(tokenStreams, TokenStreams.TOKEN_DELTA, numNames * 1)
+                            .put((byte)Integer.parseInt(encodeToken.getRelativeValue()));
                     break;
 
                 case TokenStreams.TOKEN_DELTA0:
-                    getOrCreateByteBufferFor(tokenStreams, TokenStreams.TOKEN_DELTA0, numNames)
-                            .put((byte)Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    getByteBufferFor(tokenStreams, TokenStreams.TOKEN_DELTA0, numNames * 1)
+                            .put((byte)Integer.parseInt(encodeToken.getRelativeValue()));
                     break;
 
                 case TokenStreams.TOKEN_NOP:
                 case TokenStreams.TOKEN_MATCH:
                 case TokenStreams.TOKEN_END:
-                    //TODO: do we need to handle these token types here? throwing causes exceptions
-                    //throw new CRAMException("Invalid token type: " + type);
+                    // noop - token types with no associated value
                     break;
 
                 default:
                     throw new CRAMException("Invalid token type: " + type);
             }
         }
-        // we need to set the limit on these streams so that the downstream consumer does't try to consume
-        // the entire stream
+        // since we've over-allocated these streams,we need to set the limit on these streams so that the downstream
+        // consumer doesn't try to consume the entire stream,
         for (int i = 0; i < TokenStreams.TOTAL_TOKEN_TYPES; i++) {
             if (tokenStreams.get(i) != null) {
                 tokenStreams.get(i).limit(tokenStreams.get(i).position());
@@ -294,12 +302,14 @@ private List<ByteBuffer> fillByteStreamsForPosition(
         return tokenStreams;
     }
 
-    private ByteBuffer getOrCreateByteBufferFor(
+    // since we don't want to allocate a ByteBuffer that we'll never use, allocate them just-in-time
+    // as need
+    private ByteBuffer getByteBufferFor(
             final List<ByteBuffer> tokenStreams,
             final int tokenType,
-            final int numNames) {
+            final int requiredSize) {
         if (tokenStreams.get(tokenType) == null) {
-            tokenStreams.set(tokenType, CompressionUtils.allocateByteBuffer(maxLength * numNames));
+            tokenStreams.set(tokenType, CompressionUtils.allocateByteBuffer(requiredSize));
         }
         return tokenStreams.get(tokenType);
     }
@@ -337,7 +347,6 @@ private static ByteBuffer tryCompress(final ByteBuffer nameTokenStream, final bo
                 final RangeEncode rangeEncode = new RangeEncode();
                 nameTokenStream.rewind();
                 final ByteBuffer tmpByteBuffer = rangeEncode.compress(nameTokenStream, new RangeParams(rangeEncoderFlagSet));
-                //TODO: using "remaining" is a pretty sketchy way to do this, since it assumes that the buffer's limit is meaningful
                 if (bestCompressedLength > tmpByteBuffer.limit()) {
                     bestCompressedLength = tmpByteBuffer.limit();
                     compressedByteBuffer = tmpByteBuffer;
@@ -374,17 +383,18 @@ private static ByteBuffer tryCompress(final ByteBuffer nameTokenStream, final bo
         return compressedByteBuffer;
     }
 
-    private void serializeByteStreamsFor(
+    private void serializeTokenStreams(
             final List<ByteBuffer> tokenStreams,
-            final boolean useArith,
-            final ByteBuffer outBuffer) {
+            final ByteBuffer outBuffer,
+            final boolean useArith) {
         // Compress and serialise the non-null tokenStreams
-        for (int tokenType = 0; tokenType <= TokenStreams.TOKEN_END; tokenType++) {
-            final ByteBuffer tokenBytes = tokenStreams.get(tokenType);
+        for (int tokenStreamType = 0; tokenStreamType <= TokenStreams.TOKEN_END; tokenStreamType++) {
+            final ByteBuffer tokenBytes = tokenStreams.get(tokenStreamType);
             if (tokenBytes != null && tokenBytes.position() > 0) {
-                outBuffer.put((byte) (tokenType + ((tokenType == 0) ? 128 : 0))); //TODO: check this for sign bit correctness
+                // if this encoder was aware of duplicate streams, we would need to detect and encode them
+                // here, and set the DUP_PREVIOUS_STREAM_FLAG_MASK bit
+                outBuffer.put((byte) (tokenStreamType | (tokenStreamType == 0 ? TokenStreams.NEW_POSITION_FLAG_MASK : 0)));
                 final ByteBuffer tempOutByteBuffer = tryCompress(tokenBytes, useArith);
-                //TODO: the use of limit is sketchy here...
                 CompressionUtils.writeUint7(tempOutByteBuffer.limit(), outBuffer);
                 outBuffer.put(tempOutByteBuffer);
             }
diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java
index 8a3e1789a8..e463181fd0 100644
--- a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java
+++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java
@@ -25,9 +25,9 @@ public class TokenStreams {
 
     public static final int TOTAL_TOKEN_TYPES = 13;
 
-    private static final int NEW_POSITION_FLAG_MASK = 0x80;
-    private static final int DUP_PREVIOUS_STREAM_FLAG_MASK = 0x40;
-    private static final int TYPE_TOKEN_FLAG_MASK = 0x3F;
+    public static final byte TYPE_TOKEN_FLAG_MASK = 0x3F;
+    public static final byte DUP_PREVIOUS_STREAM_FLAG_MASK = 0x40;
+    public static final byte NEW_POSITION_FLAG_MASK = (byte) 0x80;
 
     // choose an initial estimate of the number of expected token positions, which we use to preallocate lists
     private static final int DEFAULT_NUMBER_OF_TOKEN_POSITIONS = 30;
@@ -51,8 +51,8 @@ public TokenStreams(final ByteBuffer inputByteBuffer, final int useArith, final
         int tokenPosition = -1;
         while (inputByteBuffer.hasRemaining()) {
             final byte tokenTypeFlags = inputByteBuffer.get();
-            final boolean startNewPosition = ((tokenTypeFlags & NEW_POSITION_FLAG_MASK) != 0);
-            final boolean isDupStream = ((tokenTypeFlags & DUP_PREVIOUS_STREAM_FLAG_MASK) != 0);
+            final boolean startNewPosition = (tokenTypeFlags & NEW_POSITION_FLAG_MASK) != 0;
+            final boolean isDupStream = (tokenTypeFlags & DUP_PREVIOUS_STREAM_FLAG_MASK) != 0;
             final int tokenType = tokenTypeFlags & TYPE_TOKEN_FLAG_MASK;
             if (tokenType < 0 || tokenType > TOKEN_END) {
                 throw new CRAMException("Invalid name tokenizer token stream type: " + tokenType);
@@ -100,70 +100,70 @@ public TokenStreams(final ByteBuffer inputByteBuffer, final int useArith, final
                     final RANSNx16Decode ransDecode = new RANSNx16Decode();
                     uncompressedTokenStream = ransDecode.uncompress(ByteBuffer.wrap(compressedTokenStream));
                 }
-                getTokenStreamsForPos(tokenPosition)[tokenType] = uncompressedTokenStream;
+                getStreamsForPos(tokenPosition)[tokenType] = uncompressedTokenStream;
             }
         }
         //displayTokenStreamSizes();
     }
 
-//    private void displayTokenStreamSizes() {
-//        for (int t = 0; t < TOTAL_TOKEN_TYPES; t++) {
-//            System.out.print(String.format("%s ", typeToString(t)));
-//        }
-//        System.out.println();
-//        for (int pos = 0; pos < tokenStreams.length; pos++) {
-//            System.out.print(String.format("Pos %2d: ", pos));
-//            for (int typ = 0; typ < tokenStreams[pos].length; typ++) {
-//                final ByteBuffer bf = tokenStreams[pos][typ];
-//                if (bf == null) {
-//                    System.out.print(String.format("%8s", "null"));
-//                } else {
-//                    System.out.print(String.format("%8d", bf.limit()));
-//                }
-//            }
-//            System.out.println();
-//        }
-//    }
-//
-//   private String typeToString(int i) {
-//        switch (i) {
-//            case TOKEN_TYPE:
-//                return "TOKEN_TYPE";
-//            case TOKEN_STRING:
-//                return "TOKEN_STRING";
-//            case TOKEN_CHAR:
-//                return "TOKEN_CHAR";
-//            case TOKEN_DIGITS0:
-//                return "TOKEN_DIGITS0";
-//            case TOKEN_DZLEN:
-//                return "TOKEN_DZLEN";
-//            case TOKEN_DUP:
-//                return "TOKEN_DUP";
-//            case TOKEN_DIFF:
-//                return "TOKEN_DIFF";
-//            case TOKEN_DIGITS:
-//                return "TOKEN_DIGITS";
-//            case TOKEN_DELTA:
-//                return "TOKEN_DELTA";
-//            case TOKEN_DELTA0:
-//                return "TOKEN_DELTA0";
-//            case TOKEN_MATCH:
-//                return "TOKEN_MATCH";
-//            case TOKEN_END:
-//                return "TOKEN_END";
-//            case TOKEN_NOP:
-//                return "NOP";
-//            default:
-//                throw new CRAMException("Invalid name tokenizer tokenType: " + i);
-//        }
-//    }
+    private void displayTokenStreamSizes() {
+        for (int t = 0; t < TOTAL_TOKEN_TYPES; t++) {
+            System.out.print(String.format("%s ", typeToString(t)));
+        }
+        System.out.println();
+        for (int pos = 0; pos < tokenStreams.length; pos++) {
+            System.out.print(String.format("Pos %2d: ", pos));
+            for (int typ = 0; typ < tokenStreams[pos].length; typ++) {
+                final ByteBuffer bf = tokenStreams[pos][typ];
+                if (bf == null) {
+                    System.out.print(String.format("%8s", "null"));
+                } else {
+                    System.out.print(String.format("%8d", bf.limit()));
+                }
+            }
+            System.out.println();
+        }
+    }
+
+   private String typeToString(int i) {
+        switch (i) {
+            case TOKEN_TYPE:
+                return "TOKEN_TYPE";
+            case TOKEN_STRING:
+                return "TOKEN_STRING";
+            case TOKEN_CHAR:
+                return "TOKEN_CHAR";
+            case TOKEN_DIGITS0:
+                return "TOKEN_DIGITS0";
+            case TOKEN_DZLEN:
+                return "TOKEN_DZLEN";
+            case TOKEN_DUP:
+                return "TOKEN_DUP";
+            case TOKEN_DIFF:
+                return "TOKEN_DIFF";
+            case TOKEN_DIGITS:
+                return "TOKEN_DIGITS";
+            case TOKEN_DELTA:
+                return "TOKEN_DELTA";
+            case TOKEN_DELTA0:
+                return "TOKEN_DELTA0";
+            case TOKEN_MATCH:
+                return "TOKEN_MATCH";
+            case TOKEN_END:
+                return "TOKEN_END";
+            case TOKEN_NOP:
+                return "NOP";
+            default:
+                throw new CRAMException("Invalid name tokenizer tokenType: " + i);
+        }
+    }
 
-    public ByteBuffer[] getTokenStreamsForPos(final int pos) {
+    public ByteBuffer[] getStreamsForPos(final int pos) {
         return tokenStreams[pos];
     }
 
-    public ByteBuffer getTokenStream(final int tokenPosition, final int tokenType) {
-        return getTokenStreamsForPos(tokenPosition)[tokenType];
+    public ByteBuffer getStream(final int tokenPosition, final int tokenType) {
+        return getStreamsForPos(tokenPosition)[tokenType];
     }
 
     private int initializeTokenStreams(final int preallocatedPositions) {
diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/tokens/EncodeToken.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/tokens/EncodeToken.java
index 47f05b20cc..623c5cdb06 100644
--- a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/tokens/EncodeToken.java
+++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/tokens/EncodeToken.java
@@ -1,27 +1,65 @@
 package htsjdk.samtools.cram.compression.nametokenisation.tokens;
 
+import htsjdk.samtools.cram.CRAMException;
+import htsjdk.samtools.cram.compression.nametokenisation.TokenStreams;
+
 public class EncodeToken {
 
-    private String actualTokenValue;
-    private String relativeTokenValue;
     private byte tokenType;
+    private String actualValue;
+    private String relativeValue;
 
-    //TODO: its super wasteful to always store strings for these, since they're often ints
-    public EncodeToken(final String str, final String val, final byte type) {
-        this.actualTokenValue = str;
-        this.relativeTokenValue = val;
-        this.tokenType = type;
+    /**
+     * Token type TOKEN_END has no associated value at all.
+     * @param typeTokenEnd - must be TokenStreams.TOKEN_END
+     */
+    public EncodeToken(final byte typeTokenEnd) {
+        this(typeTokenEnd, null, null);
+        if (typeTokenEnd != TokenStreams.TOKEN_END) {
+            throw new CRAMException("This constructor should only be used for token type TOKEN_END");
+        }
     }
 
-    public String getActualTokenValue() {
-        return actualTokenValue;
+    /**
+     * Token types TOKEN_DIFF and TOKEN_DUP have a relative value, but no absolute value, since they
+     * don't represent a token fragment at a particular position, but rather a token that dictates
+     * which previous read to use as a diff reference.
+     * @param type
+     */
+    public EncodeToken(final byte type, final String relativeValue) {
+        this(type, null, relativeValue);
+        if (tokenType != TokenStreams.TOKEN_DIFF && tokenType != TokenStreams.TOKEN_DUP) {
+            throw new CRAMException("This constructor should only be used for token types TOKEN_DIFF and TOKEN_DUP");
+        }
     }
 
-    public String getRelativeTokenValue() {
-        return relativeTokenValue;
+    /**
+     * Token types TOKEN_DELTA, TOKEN_DELTA0, TOKEN_DIGITS, TOKEN_DIGITS0 all have a relative value that
+     * differs from the actual value of the original fragment, and for those token types, we need to preserve
+     * both during the encoding process. We need to preserve the absolute value for so we can use it to detect
+     * duplicate fragments in downstream reads, and we need to preserve the relative value since, if it's
+     * present, that's what we emit to the output stream.
+     *
+     * @param type
+     * @param actualValue
+     * @param relativeValue
+     */
+    public EncodeToken(final byte type, final String actualValue, final String relativeValue) {
+        this.tokenType = type;
+        this.actualValue = actualValue;
+        this.relativeValue = relativeValue;
     }
 
     public byte getTokenType() {
         return tokenType;
     }
+
+    public String getActualValue() {
+        return actualValue;
+    }
+
+    public String getRelativeValue() {
+        return relativeValue;
+    }
+
 }
\ No newline at end of file
diff --git a/src/test/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationTest.java b/src/test/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationTest.java
index f086d8c298..5167c4a6d1 100644
--- a/src/test/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationTest.java
+++ b/src/test/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationTest.java
@@ -8,17 +8,20 @@
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 public class NameTokenisationTest extends HtsjdkTest {
 
     private static class TestDataEnvelope {
         public final byte[] testArray;
-        public TestDataEnvelope(final byte[] testdata) {
+        public final boolean useArith;
+        public TestDataEnvelope(final byte[] testdata, final boolean useArith) {
             this.testArray = testdata;
+            this.useArith = useArith;
         }
         public String toString() {
-            return String.format("Array of size %d", testArray.length);
+            return String.format("Array of size %d/%b", testArray.length, useArith);
         }
     }
 
@@ -74,9 +77,10 @@ public Object[][] getNameTokenisationTestData() {
                 "H0164ALXX140820:2:1101:15118:25288"); //TODO: no trailing separator...for now
 
         final List<Object[]> testCases = new ArrayList<>();
-        for (String readName : readNamesList) {
-            Object[] objects = new Object[] { new TestDataEnvelope(readName.getBytes()) };
-            testCases.add(objects);
+        for (final String readName : readNamesList) {
+            for (boolean useArith : Arrays.asList(true, false)) {
+                testCases.add(new Object[] { new TestDataEnvelope(readName.getBytes(), useArith) });
+            }
         }
         return testCases.toArray(new Object[][]{});
     }
@@ -87,7 +91,7 @@ public void testRoundTrip(final TestDataEnvelope td) {
         final NameTokenisationDecode nameTokenisationDecode = new NameTokenisationDecode();
 
         final ByteBuffer uncompressedBuffer = ByteBuffer.wrap(td.testArray);
-        final ByteBuffer compressedBuffer = nameTokenisationEncode.compress(uncompressedBuffer, false);
+        final ByteBuffer compressedBuffer = nameTokenisationEncode.compress(uncompressedBuffer, td.useArith);
         final String decompressedNames = nameTokenisationDecode.uncompress(compressedBuffer);
         final ByteBuffer decompressedNamesBuffer = StandardCharsets.UTF_8.encode(decompressedNames);
         uncompressedBuffer.rewind();