From c96faa76a0437a8ed5788d3cadc56a9035346609 Mon Sep 17 00:00:00 2001 From: Clay Mayers <1301632+clay-mayers@users.noreply.github.com> Date: Mon, 11 Mar 2024 21:50:33 +0000 Subject: [PATCH] Fix: V2 Escape control chars when stringifing JSON (#492) Only process a single character at a time - Processing 4 at at a time has more repeatable timing but on average is slower. Processing an unescaped string can be faster than the original method of find/find. --- .../HollowRecordJsonStringifier.java | 48 ++++--------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/hollow/src/main/java/com/netflix/hollow/tools/stringifier/HollowRecordJsonStringifier.java b/hollow/src/main/java/com/netflix/hollow/tools/stringifier/HollowRecordJsonStringifier.java index 92700b8532..174095027c 100644 --- a/hollow/src/main/java/com/netflix/hollow/tools/stringifier/HollowRecordJsonStringifier.java +++ b/hollow/src/main/java/com/netflix/hollow/tools/stringifier/HollowRecordJsonStringifier.java @@ -423,8 +423,6 @@ private void appendFieldStringify(Writer writer, HollowDataAccess dataAccess, in /** * Returns the category (CAT_*) of ch8 - non-carry bits are garbage - * Takes four characters to limit CPU pipeline stalls. Use non-escaped char - * (e.g ' '), if you don't have all 4 characters. * * Works by making 3 copies of the character every 9 bits leaving a "carry" bit * to hold the category in front of each copy. When detecting a specific char, @@ -434,26 +432,14 @@ private void appendFieldStringify(Writer writer, HollowDataAccess dataAccess, in * (e.g, 0x5c ^ 0xa3 + 1 = 0x100). Detecting < 0x20 is detecting just the upper * 3 bits are 0 so, 0x20 is added instead of 1 (e.g., 0x0a ^ e0 + 0x20 = 0x10a). */ - private int categorize(char ch8_1, char ch8_2, char ch8_3, char ch8_4) { - int ch1 = 0xff & ch8_1; - int ch2 = 0xff & ch8_2; - int ch3 = 0xff & ch8_3; - int ch4 = 0xff & ch8_4; - int c1 = ch1 * DUP3TIMES_WITH_CAT_BIT; - int c2 = ch2 * DUP3TIMES_WITH_CAT_BIT; - int c3 = ch3 * DUP3TIMES_WITH_CAT_BIT; - int c4 = ch4 * DUP3TIMES_WITH_CAT_BIT; - - c1 ^= DETECT_MASK; - c2 ^= DETECT_MASK; - c3 ^= DETECT_MASK; - c4 ^= DETECT_MASK; - c1 += CAUSE_CARRY; - c2 += CAUSE_CARRY; - c3 += CAUSE_CARRY; - c4 += CAUSE_CARRY; - - return c1 | c2 | c3 | c4; + private int categorize(char ch8) { + int ch = 0xff & ch8; + int cat = ch * DUP3TIMES_WITH_CAT_BIT; + + cat ^= DETECT_MASK; + cat += CAUSE_CARRY; + + return cat; } /** @@ -461,25 +447,11 @@ private int categorize(char ch8_1, char ch8_2, char ch8_3, char ch8_4) { */ private int categorize(String str) { int len = str.length(); - int len2 = len & ~3; int cat = 0; int i; - for(i = 0; i < len2 ; i = i + 4) { - cat = cat | categorize(str.charAt(i), str.charAt(i+1), str.charAt(i+2), str.charAt(i+3)); - } - switch(len-len2) { - case 0: - break; - case 1: - cat = cat | categorize(str.charAt(i), ' ', ' ', ' '); - break; - case 2: - cat = cat | categorize(str.charAt(i), str.charAt(i+1), ' ', ' '); - break; - case 3: - cat = cat | categorize(str.charAt(i), str.charAt(i+1), str.charAt(i+2), ' '); - break; + for(i = 0; i < len ; i = i++) { + cat = cat | categorize(str.charAt(i)); } return cat & CATEGORY_BITS_MASK; }