Skip to content

Commit

Permalink
Fix: Escape control chars when stringifing JSON (Netflix#492)
Browse files Browse the repository at this point in the history
  • Loading branch information
clay-mayers committed Mar 11, 2024
1 parent 6bd58a7 commit a11ff94
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -409,10 +409,125 @@ private void appendFieldStringify(Writer writer, HollowDataAccess dataAccess, in
}
}

private final static int CHARS2DETECT =
('"' << (2*9)) | // " DQ
('\\' << 9) | // \ RS
0x1f; // < 0x20 (non-print)
private final static int CAT_DQ = 1 << (3*9 - 1);
private final static int CAT_RS = 1 << (2*9 - 1);
private final static int CAT_NP = 1 << (9 - 1);
private final static int CATEGORY_BITS_MASK = CAT_DQ | CAT_RS | CAT_NP;
private final static int DETECT_MASK = ~CHARS2DETECT & ~CATEGORY_BITS_MASK;
private final static int DUP3TIMES_WITH_CAT_BIT= 0x40201;
private final static int CAUSE_CARRY = 0x40220;

/**
* Returns the category (CAT_*) of ch8 - non-carry bits are garbage
* Takes four characters to limit CPU pipeline stalls. Use non-escaped char
* (e.g ' '), if you don't have all 4 characters.
*
* Works by making 3 copies of the character every 9 bits leaving a "carry" bit
* to hold the category in front of each copy. When detecting a specific char,
* the xor mask is the 1's complement of the char to detect, which will set the
* test char to 0xff when it matches. Adding 1 to the 0xff will clear 0xff to 0,
* and "carry" over into the category bit recording the test character was detected.
* (e.g, 0x5c ^ 0xa3 + 1 = 0x100). Detecting < 0x20 is detecting just the upper
* 3 bits are 0 so, 0x20 is added instead of 1 (e.g., 0x0a ^ e0 + 0x20 = 0x10a).
*/
private int categorize(char ch8_1, char ch8_2, char ch8_3, char ch8_4) {
int ch1 = 0xff & ch8_1;
int ch2 = 0xff & ch8_2;
int ch3 = 0xff & ch8_3;
int ch4 = 0xff & ch8_4;
int c1 = ch1 * DUP3TIMES_WITH_CAT_BIT;
int c2 = ch2 * DUP3TIMES_WITH_CAT_BIT;
int c3 = ch3 * DUP3TIMES_WITH_CAT_BIT;
int c4 = ch4 * DUP3TIMES_WITH_CAT_BIT;

c1 ^= DETECT_MASK;
c2 ^= DETECT_MASK;
c3 ^= DETECT_MASK;
c4 ^= DETECT_MASK;
c1 += CAUSE_CARRY;
c2 += CAUSE_CARRY;
c3 += CAUSE_CARRY;
c4 += CAUSE_CARRY;

return c1 | c2 | c3 | c4;
}

/**
* Returns the categories found in str (or of CAT_*)
*/
private int categorize(String str) {
int len = str.length();
int len2 = len & ~3;
int cat = 0;
int i;

for(i = 0; i < len2 ; i = i + 4) {
cat = cat | categorize(str.charAt(i), str.charAt(i+1), str.charAt(i+2), str.charAt(i+3));
}
switch(len-len2) {
case 0:
break;
case 1:
cat = cat | categorize(str.charAt(i), ' ', ' ', ' ');
break;
case 2:
cat = cat | categorize(str.charAt(i), str.charAt(i+1), ' ', ' ');
break;
case 3:
cat = cat | categorize(str.charAt(i), str.charAt(i+1), str.charAt(i+2), ' ');
break;
}
return cat & CATEGORY_BITS_MASK;
}

/**
* Escapes the NP characters in str (eg. NUL becomes \u0000)
*/
private String escapeNP(String str) {
char c = 0;
int len = str.length();
StringBuilder sb = new StringBuilder(2*len);

for (int i = 0; i < len; i += 1) {
c = str.charAt(i);
if (c < 0x10) {
sb.append("\\u000" + Integer.toHexString(c));
} else if (c < 0x20 ) {
sb.append("\\u00" + Integer.toHexString(c));
} else {
sb.append(c);
}
}
return sb.toString();
}

/**
* Escapes JSON's invalid string characters in str
* Works by categorizing the characters in str and then only
* escaping the characters based on the categories recorded.
*/
private String escapeString(String str) {
if (str.indexOf('\\') == -1 && str.indexOf('\"') == -1)
int cat = categorize(str);

if (cat == 0) {
return str;
return str.replace("\\", "\\\\").replace("\"", "\\\"");
}

/* Replace reverse solidus first since subsequent substitutions add more */
if ((cat & CAT_RS) != 0) {
str = str.replace("\\","\\\\");
}
if ((cat & CAT_DQ) != 0) {
str = str.replace("\"","\\\"");
}
if ((cat & CAT_NP) != 0) {
return escapeNP(str);
}
return str;
}

private void appendIndentation(Writer writer, int indentation) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ public void testStringifyTypeWithString() throws IOException {
String msg = "String types should be printed correctly";
Assert.assertEquals(msg, "\"foo\"",
stringifyType(TypeWithString.class, true, false, new TypeWithString("foo")));
Assert.assertEquals(msg, "\"\\\\/foo\\u000a\"",
stringifyType(TypeWithString.class, true, false, new TypeWithString("\\/foo\n")));
Assert.assertEquals(msg, "{" + NEWLINE
+ INDENT + "\"value\": {" + NEWLINE
+ INDENT + INDENT + "\"value\": \"foo\"" + NEWLINE
Expand Down

0 comments on commit a11ff94

Please sign in to comment.