Skip to content

Commit

Permalink
Fix: V3 Escape control chars when stringifing JSON (Netflix#492)
Browse files Browse the repository at this point in the history
Properly handle UTF-16
  • Loading branch information
clay-mayers committed Mar 12, 2024
1 parent c96faa7 commit b4a080d
Showing 1 changed file with 31 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -409,70 +409,67 @@ private void appendFieldStringify(Writer writer, HollowDataAccess dataAccess, in
}
}

private final static int CHARS2DETECT =
('"' << (2*9)) | // " DQ
('\\' << 9) | // \ RS
0x1f; // < 0x20 (non-print)
private final static int CAT_DQ = 1 << (3*9 - 1);
private final static int CAT_RS = 1 << (2*9 - 1);
private final static int CAT_NP = 1 << (9 - 1);
private final static int CATEGORY_BITS_MASK = CAT_DQ | CAT_RS | CAT_NP;
private final static int DETECT_MASK = ~CHARS2DETECT & ~CATEGORY_BITS_MASK;
private final static int DUP3TIMES_WITH_CAT_BIT= 0x40201;
private final static int CAUSE_CARRY = 0x40220;
private final static long CHARS2DETECT =
(0x22L << (2*17)) | // " DQ
(0x5cL << 17) | // \ RS
0x1fL; // < 0x20 (non-print)
private final static long CAT_DQ = 1L << (3*17 - 1);
private final static long CAT_RS = 1L << (2*17 - 1);
private final static long CAT_NP = 1L << (17 - 1);
private final static long CATEGORY_BITS_MASK = CAT_DQ | CAT_RS | CAT_NP;
private final static long DETECT_MASK = ~CHARS2DETECT & ~CATEGORY_BITS_MASK;
private final static long DUP3TIMES_WITH_CAT_BIT= 1 | (1 << 17) | (1L << (17*2));
private final static long CAUSE_CARRY = 0x20 | (1 << 17) | (1L << (17*2));

/**
* Returns the category (CAT_*) of ch8 - non-carry bits are garbage
*
* Works by making 3 copies of the character every 9 bits leaving a "carry" bit
* Works by making 3 copies of the character every 17 bits leaving a "carry" bit
* to hold the category in front of each copy. When detecting a specific char,
* the xor mask is the 1's complement of the char to detect, which will set the
* test char to 0xff when it matches. Adding 1 to the 0xff will clear 0xff to 0,
* test char to 0xffff when it matches. Adding 1 to the 0xffff will clear 0xffff to 0,
* and "carry" over into the category bit recording the test character was detected.
* (e.g, 0x5c ^ 0xa3 + 1 = 0x100). Detecting < 0x20 is detecting just the upper
* 3 bits are 0 so, 0x20 is added instead of 1 (e.g., 0x0a ^ e0 + 0x20 = 0x10a).
* (e.g, 0x5c ^ 0xffa3 + 1 = 0x10000). Detecting < 0x20 is detecting just the upper
* 11 bits are 0 so, 0x20 is added instead of 1 (e.g., 0x0a ^ ffe0 + 0x20 = 0x1000a).
*/
private int categorize(char ch8) {
int ch = 0xff & ch8;
int cat = ch * DUP3TIMES_WITH_CAT_BIT;
private static long categorize(char ch) {
long cat = ch * DUP3TIMES_WITH_CAT_BIT;

cat ^= DETECT_MASK;
cat += CAUSE_CARRY;

return cat;
}
}

/**
* Returns the categories found in str (or of CAT_*)
*/
private int categorize(String str) {
int len = str.length();
int cat = 0;
int i;
private static long categorize(String string) {
int len = string.length();
long cat = 0;

for(i = 0; i < len ; i = i++) {
cat = cat | categorize(str.charAt(i));
for(int i = 0; i < len ; i++) {
cat = cat | categorize(string.charAt(i));
}
return cat & CATEGORY_BITS_MASK;
}
}

/**
* Escapes the NP characters in str (eg. NUL becomes \u0000)
* Escapes the NP characters in str (eg. \n becomes \u000a)
*/
private String escapeNP(String str) {
char c = 0;
int len = str.length();
StringBuilder sb = new StringBuilder(2*len);

for (int i = 0; i < len; i += 1) {
c = str.charAt(i);
if (c < 0x10) {
char c = str.charAt(i);

if (c < 0x10)
sb.append("\\u000" + Integer.toHexString(c));
} else if (c < 0x20 ) {
else if (c < 0x20)
sb.append("\\u00" + Integer.toHexString(c));
} else {
else
sb.append(c);
}
}
return sb.toString();
}
Expand All @@ -483,7 +480,7 @@ private String escapeNP(String str) {
* escaping the characters based on the categories recorded.
*/
private String escapeString(String str) {
int cat = categorize(str);
long cat = categorize(str);

if (cat == 0) {
return str;
Expand Down

0 comments on commit b4a080d

Please sign in to comment.