Consolidate and optimize, remove unecessary code.

samtools · Dec 17, 2024 · d93218d · d93218d
1 parent ed26bb6
commit d93218d
Show file tree

Hide file tree

Showing 5 changed files with 291 additions and 239 deletions.
diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java
@@ -11,17 +11,18 @@
 // so we don't have to repeatedly interconvert them when fetching from this list
 
 public class NameTokenisationDecode {
-    // TODO: lift these values to a common location since they're used by the encoder, the decoder, and the tests
+    //TODO: lift these values to a common location since they're used by the encoder, the decoder, and the tests
+    //TODO: once we get clarity on the spec (https://github.com/samtools/hts-specs/issues/802) on how to
+    // calculate this, we should use it to verify the result of decompressing
     // for now, since we're returning a String of all the names (instead of a list, which is more efficient) because,
     // use a single byte to separate the names; this particular byte is chosen because the calling code in the CRAM
     // reader for read names already assumes it will be handed a block of '\0' separated names
     public final static byte NAME_SEPARATOR = 0;
     public final static CharSequence LOCAL_NAME_SEPARATOR_CHARSEQUENCE = new String(new byte[] {NAME_SEPARATOR});
-
-    //TODO: once we get clarity on the spec (https://github.com/samtools/hts-specs/issues/802) on how to
-    // calculate this, we should use it to verify the result of decompressing
     public static final int UNCOMPRESSED_LENGTH_ADJUSTMENT = 1;
 
+    public static int DEFAULT_POSITION_ALLOCATION = 30;
+
     // the input must be a ByteBuffer containing the read names, separated by the NAME_SEPARATOR byte, WITHOUT
     // a terminating separator
     public String uncompress(final ByteBuffer inBuffer) {
@@ -54,60 +55,59 @@ private String decodeSingleName(
             final int currentNameIndex) {
         // consult tokenStreams[0, TokenStreams.TOKEN_TYPE] to determine if this name uses dup or diff, and either
         // way, determine the reference name from which we will construct this name
-        final byte referenceType = tokenStreams.getTokenStream(0, TokenStreams.TOKEN_TYPE).get();
-        final ByteBuffer distStream = tokenStreams.getTokenStream(0, referenceType);
+        final byte referenceType = tokenStreams.getStream(0, TokenStreams.TOKEN_TYPE).get();
+        final ByteBuffer distStream = tokenStreams.getStream(0, referenceType);
         final int referenceName = currentNameIndex - distStream.getInt() & 0xFFFFFFFF;
 
         if (referenceType == TokenStreams.TOKEN_DUP) {
-            // propagate the existing tokens for the reference name and use them for this name, in case there is
-            // a future instance of this same name that refers to THIS name's tokens, and then reconstruct and
+            // propagate the existing tokens for the reference name and use them for this name (in case there is
+            // a future instance of this same name that refers to THIS name's tokens), and then reconstruct and
             // return the new name by joining the accumulated tokens
             decodedNameTokens.add(currentNameIndex, decodedNameTokens.get(referenceName));
             return String.join("", decodedNameTokens.get(currentNameIndex));
-        }
-
-        if (referenceType != TokenStreams.TOKEN_DIFF) {
+        } else if (referenceType != TokenStreams.TOKEN_DIFF) {
             throw new CRAMException(String.format(
                     "Invalid nameType %s. nameType must be either TOKEN_DIFF or TOKEN_DUP", referenceType));
         }
 
-        // preallocate for with 30 tokens, but let the list auto-expand if we exceed that
-        final List<String> currentNameTokens = new ArrayList<>(30);
+        // preallocate for DEFAULT_NUMBER_OF_POSITIONS token (columns), but the list size will auto-expand if we exceed that
+        final List<String> currentNameTokens = new ArrayList<>(DEFAULT_POSITION_ALLOCATION);
         final StringBuilder decodedNameBuilder = new StringBuilder();
         byte type = -1;
+
         // start at position 1; at position 0, there is only nameType information
         for (int tokenPos = 1; type != TokenStreams.TOKEN_END; tokenPos++) {
-            type = tokenStreams.getTokenStream(tokenPos, TokenStreams.TOKEN_TYPE).get();
+            type = tokenStreams.getStream(tokenPos, TokenStreams.TOKEN_TYPE).get();
             String currentToken = "";
 
-            switch(type){
+            switch (type) {
                 case TokenStreams.TOKEN_CHAR:
-                    final char currentTokenChar = (char) tokenStreams.getTokenStream(tokenPos, TokenStreams.TOKEN_CHAR).get();
+                    final char currentTokenChar = (char) tokenStreams.getStream(tokenPos, TokenStreams.TOKEN_CHAR).get();
                     currentToken = String.valueOf(currentTokenChar);
                     break;
                 case TokenStreams.TOKEN_STRING:
-                    currentToken = readString(tokenStreams.getTokenStream(tokenPos, TokenStreams.TOKEN_STRING));
+                    currentToken = readString(tokenStreams.getStream(tokenPos, TokenStreams.TOKEN_STRING));
                     break;
                 case TokenStreams.TOKEN_DIGITS:
                     currentToken = getDigitsToken(tokenStreams, tokenPos, TokenStreams.TOKEN_DIGITS);
                     break;
                 case TokenStreams.TOKEN_DIGITS0:
                     final String digits0Token = getDigitsToken(tokenStreams, tokenPos, TokenStreams.TOKEN_DIGITS0);
-                    final int lenDigits0Token = tokenStreams.getTokenStream(tokenPos, TokenStreams.TOKEN_DZLEN).get() & 0xFF;
+                    final int lenDigits0Token = tokenStreams.getStream(tokenPos, TokenStreams.TOKEN_DZLEN).get() & 0xFF;
                     currentToken = leftPadWith0(digits0Token, lenDigits0Token);
                     break;
                 case TokenStreams.TOKEN_DELTA:
-                    currentToken = getDeltaToken(tokenPos, tokenStreams, TokenStreams.TOKEN_DELTA, decodedNameTokens, referenceName);
+                    currentToken = getDeltaToken(tokenStreams, tokenPos, TokenStreams.TOKEN_DELTA, decodedNameTokens, referenceName);
                     break;
                 case TokenStreams.TOKEN_DELTA0:
-                    final String delta0Token = getDeltaToken(tokenPos, tokenStreams, TokenStreams.TOKEN_DELTA0, decodedNameTokens, referenceName);
+                    final String delta0Token = getDeltaToken(tokenStreams, tokenPos, TokenStreams.TOKEN_DELTA0, decodedNameTokens, referenceName);
                     final int lenDelta0Token = decodedNameTokens.get(referenceName).get(tokenPos-1).length();
                     currentToken = leftPadWith0(delta0Token, lenDelta0Token);
                     break;
                 case TokenStreams.TOKEN_MATCH:
                     currentToken = decodedNameTokens.get(referenceName).get(tokenPos-1);
                     break;
-                case TokenStreams.TOKEN_END: // tolerate END, it terminates the enclosing loop
+                case TokenStreams.TOKEN_END: // tolerate END, it will terminates the enclosing loop
                     break;
                 case TokenStreams.TOKEN_NOP:
                     //no-op token, inserted by the writer to take up space to keep the streams aligned
@@ -132,8 +132,8 @@ private String decodeSingleName(
     }
 
     private String getDeltaToken(
-            final int tokenPosition,
             final TokenStreams tokenStreams,
+            final int tokenPosition,
             final byte tokenType,
             final List<List<String>> previousTokensList,
             final int prevNameIndex) {
@@ -145,7 +145,7 @@ private String getDeltaToken(
         try {
             final String prevToken = previousTokensList.get(prevNameIndex).get(tokenPosition - 1);
             int prevTokenInt = Integer.parseInt(prevToken);
-            final int deltaTokenValue = tokenStreams.getTokenStream(tokenPosition, tokenType).get() & 0xFF;
+            final int deltaTokenValue = tokenStreams.getStream(tokenPosition, tokenType).get() & 0xFF;
             return Long.toString(prevTokenInt + deltaTokenValue);
         } catch (final NumberFormatException e) {
             throw new CRAMException(
@@ -163,10 +163,10 @@ private String getDigitsToken(
         if (tokenType != TokenStreams.TOKEN_DIGITS && tokenType != TokenStreams.TOKEN_DIGITS0) {
             throw new CRAMException(
                     String.format(
-                            "Invalid tokenType : %s tokenType must be either TOKEN_DIGITS or TOKEN_DIGITS0",
+                            "Invalid tokenType: %s tokenType must be either TOKEN_DIGITS or TOKEN_DIGITS0",
                             tokenType));
         }
-        final ByteBuffer digitsByteBuffer = tokenStreams.getTokenStream(tokenPosition, tokenType);
+        final ByteBuffer digitsByteBuffer = tokenStreams.getStream(tokenPosition, tokenType);
         final long digits = digitsByteBuffer.getInt() & 0xFFFFFFFFL;
         return Long.toString(digits);
     }
@@ -177,7 +177,7 @@ private String readString(final ByteBuffer inputBuffer) {
         final StringBuilder resultStringBuilder = new StringBuilder();
         byte currentByte = inputBuffer.get();
         while (currentByte != 0) {
-            //TODO: fix this sketchy cast; this will fail on non-ASCII characters
+            //TODO: fix this sketchy cast; this will fail on non-ASCII characters ?
             resultStringBuilder.append((char) currentByte);
             currentByte = inputBuffer.get();
         }