From b4a080d2210a9491eb8ef29b21dc4884bb7ff90b Mon Sep 17 00:00:00 2001 From: Clay Mayers <1301632+clay-mayers@users.noreply.github.com> Date: Tue, 12 Mar 2024 04:10:25 +0000 Subject: [PATCH] Fix: V3 Escape control chars when stringifing JSON (#492) Properly handle UTF-16 --- .../HollowRecordJsonStringifier.java | 65 +++++++++---------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/hollow/src/main/java/com/netflix/hollow/tools/stringifier/HollowRecordJsonStringifier.java b/hollow/src/main/java/com/netflix/hollow/tools/stringifier/HollowRecordJsonStringifier.java index 174095027c..f1185faf14 100644 --- a/hollow/src/main/java/com/netflix/hollow/tools/stringifier/HollowRecordJsonStringifier.java +++ b/hollow/src/main/java/com/netflix/hollow/tools/stringifier/HollowRecordJsonStringifier.java @@ -409,70 +409,67 @@ private void appendFieldStringify(Writer writer, HollowDataAccess dataAccess, in } } - private final static int CHARS2DETECT = - ('"' << (2*9)) | // " DQ - ('\\' << 9) | // \ RS - 0x1f; // < 0x20 (non-print) - private final static int CAT_DQ = 1 << (3*9 - 1); - private final static int CAT_RS = 1 << (2*9 - 1); - private final static int CAT_NP = 1 << (9 - 1); - private final static int CATEGORY_BITS_MASK = CAT_DQ | CAT_RS | CAT_NP; - private final static int DETECT_MASK = ~CHARS2DETECT & ~CATEGORY_BITS_MASK; - private final static int DUP3TIMES_WITH_CAT_BIT= 0x40201; - private final static int CAUSE_CARRY = 0x40220; + private final static long CHARS2DETECT = + (0x22L << (2*17)) | // " DQ + (0x5cL << 17) | // \ RS + 0x1fL; // < 0x20 (non-print) + private final static long CAT_DQ = 1L << (3*17 - 1); + private final static long CAT_RS = 1L << (2*17 - 1); + private final static long CAT_NP = 1L << (17 - 1); + private final static long CATEGORY_BITS_MASK = CAT_DQ | CAT_RS | CAT_NP; + private final static long DETECT_MASK = ~CHARS2DETECT & ~CATEGORY_BITS_MASK; + private final static long DUP3TIMES_WITH_CAT_BIT= 1 | (1 << 17) | (1L << (17*2)); + private final static long CAUSE_CARRY = 0x20 | (1 << 17) | (1L << (17*2)); /** * Returns the category (CAT_*) of ch8 - non-carry bits are garbage * - * Works by making 3 copies of the character every 9 bits leaving a "carry" bit + * Works by making 3 copies of the character every 17 bits leaving a "carry" bit * to hold the category in front of each copy. When detecting a specific char, * the xor mask is the 1's complement of the char to detect, which will set the - * test char to 0xff when it matches. Adding 1 to the 0xff will clear 0xff to 0, + * test char to 0xffff when it matches. Adding 1 to the 0xffff will clear 0xffff to 0, * and "carry" over into the category bit recording the test character was detected. - * (e.g, 0x5c ^ 0xa3 + 1 = 0x100). Detecting < 0x20 is detecting just the upper - * 3 bits are 0 so, 0x20 is added instead of 1 (e.g., 0x0a ^ e0 + 0x20 = 0x10a). + * (e.g, 0x5c ^ 0xffa3 + 1 = 0x10000). Detecting < 0x20 is detecting just the upper + * 11 bits are 0 so, 0x20 is added instead of 1 (e.g., 0x0a ^ ffe0 + 0x20 = 0x1000a). */ - private int categorize(char ch8) { - int ch = 0xff & ch8; - int cat = ch * DUP3TIMES_WITH_CAT_BIT; + private static long categorize(char ch) { + long cat = ch * DUP3TIMES_WITH_CAT_BIT; cat ^= DETECT_MASK; cat += CAUSE_CARRY; return cat; - } +} /** * Returns the categories found in str (or of CAT_*) */ - private int categorize(String str) { - int len = str.length(); - int cat = 0; - int i; + private static long categorize(String string) { + int len = string.length(); + long cat = 0; - for(i = 0; i < len ; i = i++) { - cat = cat | categorize(str.charAt(i)); + for(int i = 0; i < len ; i++) { + cat = cat | categorize(string.charAt(i)); } return cat & CATEGORY_BITS_MASK; - } +} /** - * Escapes the NP characters in str (eg. NUL becomes \u0000) + * Escapes the NP characters in str (eg. \n becomes \u000a) */ private String escapeNP(String str) { - char c = 0; int len = str.length(); StringBuilder sb = new StringBuilder(2*len); for (int i = 0; i < len; i += 1) { - c = str.charAt(i); - if (c < 0x10) { + char c = str.charAt(i); + + if (c < 0x10) sb.append("\\u000" + Integer.toHexString(c)); - } else if (c < 0x20 ) { + else if (c < 0x20) sb.append("\\u00" + Integer.toHexString(c)); - } else { + else sb.append(c); - } } return sb.toString(); } @@ -483,7 +480,7 @@ private String escapeNP(String str) { * escaping the characters based on the categories recorded. */ private String escapeString(String str) { - int cat = categorize(str); + long cat = categorize(str); if (cat == 0) { return str;