Skip to content

Commit

Permalink
Bug 36350397 - [36349154->24.03] Add support for deserialization of 4…
Browse files Browse the repository at this point in the history
…-byte UTF-8 sequences

[git-p4: depot-paths = "//dev/coherence-ce/main/": change = 107214]
  • Loading branch information
aseovic committed Feb 29, 2024
1 parent 4ac959e commit a71e0b5
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2023, Oracle and/or its affiliates.
* Copyright (c) 2000, 2024, Oracle and/or its affiliates.
*
* Licensed under the Universal Permissive License v 1.0 as shown at
* https://oss.oracle.com/licenses/upl.
Expand Down Expand Up @@ -103,6 +103,7 @@
import java.net.URL;

import java.nio.BufferOverflowException;
import java.nio.charset.StandardCharsets;

import java.security.AccessController;
import java.security.PrivilegedAction;
Expand Down Expand Up @@ -1409,8 +1410,8 @@ public static String convertUTF(byte[] ab, int of, int cb, char[] ach)
int ofEnd = of + cb;
for ( ; ofAsc < ofEnd; ++ofAsc)
{
int n = ab[ofAsc] & 0xFF;
if (n >= 0x80)
int n = ab[ofAsc];
if (n < 0)
{
// it's not all "ascii" data
fAscii = false;
Expand Down Expand Up @@ -1462,14 +1463,42 @@ public static String convertUTF(byte[] ab, int of, int cb, char[] ach)
break;
}

case 0xF:
{
// 4-byte format: 1111 xxxx, 10xx xxxx, 10xx xxxx, 10xx xxxx (supplemental plane)
int ch2 = ab[++ofAsc] & 0xFF;
int ch3 = ab[++ofAsc] & 0xFF;
int ch4 = ab[++ofAsc] & 0xFF;
if ((ch2 & 0xC0) != 0x80 || (ch3 & 0xC0) != 0x80 || (ch4 & 0xC0) != 0x80)
{
throw new UTFDataFormatException();
}

int cp = (ch & 0x07) << 18 |
(ch2 & 0x3F) << 12 |
(ch3 & 0x3F) << 6 |
(ch4 & 0x3F);

cp = cp - 0x10000;

char high = (char) (0xD800 + ((cp >> 10) & 0x3FF));
char low = (char) (0xDC00 + (cp & 0x3FF));
ach[ofch++] = high;
ach[ofch++] = low;

break;
}

default:
throw new UTFDataFormatException(
"illegal leading UTF byte: " + ch);
}
}
}

return new String(ach, 0, ofch);
return fAscii // all characters can be represented by a single byte, use Latin1
? new String(ab, of, cb, StandardCharsets.ISO_8859_1)
: new String(ach, 0, ofch);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2023, Oracle and/or its affiliates.
* Copyright (c) 2000, 2024, Oracle and/or its affiliates.
*
* Licensed under the Universal Permissive License v 1.0 as shown at
* https://oss.oracle.com/licenses/upl.
Expand All @@ -25,6 +25,10 @@
import data.Person;

import java.lang.reflect.InvocationTargetException;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;

import org.junit.Test;

import java.io.ByteArrayInputStream;
Expand Down Expand Up @@ -64,6 +68,37 @@ public class ExternalizableHelperTest extends ExternalizableHelper
{
// ----- unit tests -----------------------------------------------------

/**
* Test UTF-8 conversion.
*/
@Test
public void testUtfConversion() throws IOException
{
assertUtfConversion("Aleksandar");
assertUtfConversion("Александар");
assertUtfConversion("ⅯⅭⅯⅬⅩⅩⅠⅤ");
assertUtfConversion(toBytes(new int[] {0xf0938080, 0xf09f8ebf, 0xf09f8f80, 0xf09f8e89, 0xf09f9294}));

// make sure we can still handle our proprietary (broken) encoding
String sUtf = new String(toBytes(new int[] {0xf0938080, 0xf09f8ebf, 0xf09f8f80, 0xf09f8e89, 0xf09f9294}), StandardCharsets.UTF_8);
Binary bin = ExternalizableHelper.toBinary(sUtf);
assertEquals(32, bin.length());
assertEquals(sUtf, ExternalizableHelper.fromBinary(bin));
}

private void assertUtfConversion(String s) throws IOException
{
assertUtfConversion(s.getBytes(StandardCharsets.UTF_8));
}

private void assertUtfConversion(byte[] abUtf8) throws IOException
{
String sExpected = new String(abUtf8, StandardCharsets.UTF_8);
String sActual = ExternalizableHelper.convertUTF(abUtf8, 0, abUtf8.length, new char[sExpected.length()]);
System.out.printf("\n%12s = %-12s : utf8 bytes = %d; string length = %d", sExpected, sActual, abUtf8.length, sActual.length());
assertEquals(sExpected, sActual);
}

/**
* Test POF serialization/deserialization of a java.util.Map.
*/
Expand Down Expand Up @@ -848,6 +883,16 @@ public void runPerformanceTest()

// ----- helper methods -------------------------------------------------

private static byte[] toBytes(int[] ai)
{
ByteBuffer buf = ByteBuffer.allocate(4 * ai.length);
for (int n : ai)
{
buf.putInt(n);
}
return buf.array();
}

private long doTest(int[] aData)
{
long nAccum = 0;
Expand Down

0 comments on commit a71e0b5

Please sign in to comment.