Skip to content

Commit

Permalink
Fix: V2 Escape control chars when stringifing JSON (Netflix#492)
Browse files Browse the repository at this point in the history
Only process a single character at a time - Processing 4 at at
a time has more repeatable timing but on average is slower.

Processing an unescaped string can be faster than the original
method of find/find.
  • Loading branch information
clay-mayers committed Mar 11, 2024
1 parent a11ff94 commit c96faa7
Showing 1 changed file with 10 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -423,8 +423,6 @@ private void appendFieldStringify(Writer writer, HollowDataAccess dataAccess, in

/**
* Returns the category (CAT_*) of ch8 - non-carry bits are garbage
* Takes four characters to limit CPU pipeline stalls. Use non-escaped char
* (e.g ' '), if you don't have all 4 characters.
*
* Works by making 3 copies of the character every 9 bits leaving a "carry" bit
* to hold the category in front of each copy. When detecting a specific char,
Expand All @@ -434,52 +432,26 @@ private void appendFieldStringify(Writer writer, HollowDataAccess dataAccess, in
* (e.g, 0x5c ^ 0xa3 + 1 = 0x100). Detecting < 0x20 is detecting just the upper
* 3 bits are 0 so, 0x20 is added instead of 1 (e.g., 0x0a ^ e0 + 0x20 = 0x10a).
*/
private int categorize(char ch8_1, char ch8_2, char ch8_3, char ch8_4) {
int ch1 = 0xff & ch8_1;
int ch2 = 0xff & ch8_2;
int ch3 = 0xff & ch8_3;
int ch4 = 0xff & ch8_4;
int c1 = ch1 * DUP3TIMES_WITH_CAT_BIT;
int c2 = ch2 * DUP3TIMES_WITH_CAT_BIT;
int c3 = ch3 * DUP3TIMES_WITH_CAT_BIT;
int c4 = ch4 * DUP3TIMES_WITH_CAT_BIT;

c1 ^= DETECT_MASK;
c2 ^= DETECT_MASK;
c3 ^= DETECT_MASK;
c4 ^= DETECT_MASK;
c1 += CAUSE_CARRY;
c2 += CAUSE_CARRY;
c3 += CAUSE_CARRY;
c4 += CAUSE_CARRY;

return c1 | c2 | c3 | c4;
private int categorize(char ch8) {
int ch = 0xff & ch8;
int cat = ch * DUP3TIMES_WITH_CAT_BIT;

cat ^= DETECT_MASK;
cat += CAUSE_CARRY;

return cat;
}

/**
* Returns the categories found in str (or of CAT_*)
*/
private int categorize(String str) {
int len = str.length();
int len2 = len & ~3;
int cat = 0;
int i;

for(i = 0; i < len2 ; i = i + 4) {
cat = cat | categorize(str.charAt(i), str.charAt(i+1), str.charAt(i+2), str.charAt(i+3));
}
switch(len-len2) {
case 0:
break;
case 1:
cat = cat | categorize(str.charAt(i), ' ', ' ', ' ');
break;
case 2:
cat = cat | categorize(str.charAt(i), str.charAt(i+1), ' ', ' ');
break;
case 3:
cat = cat | categorize(str.charAt(i), str.charAt(i+1), str.charAt(i+2), ' ');
break;
for(i = 0; i < len ; i = i++) {
cat = cat | categorize(str.charAt(i));
}
return cat & CATEGORY_BITS_MASK;
}
Expand Down

0 comments on commit c96faa7

Please sign in to comment.