From 0a9e4266a61e8a6a2d7cf2086054608405d6144b Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Tue, 2 Jul 2024 14:47:59 -0700 Subject: [PATCH 1/2] Adds support for parsing binary argument groups. --- .../com/amazon/ion/impl/IonCursorBinary.java | 414 +++++++++++++++++- .../amazon/ion/impl/IonCursorBinaryTest.java | 353 +++++++++++++++ 2 files changed, 755 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index 58ef574e9..757477665 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -176,6 +176,32 @@ private static class RefillableState { } + /** + * Marks an argument group. + */ + private static class ArgumentGroupMarker { + + /** + * Marks the start index of the current page in the argument group. + */ + long pageStartIndex = -1; + + /** + * Marks the end index of the current page in the argument group. If -1, this indicates that the argument + * group is delimited and the end of the page has not yet been found. + */ + long pageEndIndex = -1; + + /** + * For tagless groups, the primitive type of the tagless values in the group; otherwise, null. When null, + * there is always a single page of values in the group, and the end is reached either when an end delimiter + * is found (for delimited groups), or when the cursor's `peekIndex` reaches `pageEndIndex`. When non-null, + * there may be multiple pages of tagless values in the group; whenever the cursor reaches `pageEndIndex`, it + * must read a FlexUInt at that position to calculate the end index of the next page. + */ + PrimitiveType primitiveType = null; + } + /** * Dummy state that indicates the cursor has been terminated and that additional API calls will have no effect. */ @@ -196,6 +222,9 @@ private static class RefillableState { */ Marker parent = null; + ArgumentGroupMarker[] argumentGroupStack = new ArgumentGroupMarker[CONTAINER_STACK_INITIAL_CAPACITY]; + int argumentGroupIndex = -1; + /** * The start offset into the user-provided byte array, or 0 if the user provided an InputStream. */ @@ -388,6 +417,10 @@ private static BufferConfiguration.DataHandler getDataHandler(IonBufferConfigura containerStack[i] = new Marker(-1, -1); } + for (int i = 0; i < CONTAINER_STACK_INITIAL_CAPACITY; i++) { + argumentGroupStack[i] = new ArgumentGroupMarker(); + } + this.buffer = buffer; this.startOffset = offset; this.offset = offset; @@ -520,6 +553,10 @@ private static IonBufferConfiguration getFixedSizeConfigurationFor( containerStack[i] = new Marker(-1, -1); } + for (int i = 0; i < CONTAINER_STACK_INITIAL_CAPACITY; i++) { + argumentGroupStack[i] = new ArgumentGroupMarker(); + } + this.buffer = new byte[configuration.getInitialBufferSize()]; this.startOffset = 0; this.offset = 0; @@ -1824,7 +1861,9 @@ private boolean slowIsDelimitedEnd_1_1() { * Skips past the remaining elements of the current delimited container. * @return true if the end of the stream was reached before skipping past all remaining elements; otherwise, false. */ - boolean skipRemainingDelimitedContainerElements_1_1() { + boolean uncheckedSkipRemainingDelimitedContainerElements_1_1() { + // TODO this needs to be updated ot handle the case where the container contains non-prefixed macro invocations, + // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. while (event != Event.END_CONTAINER) { event = Event.NEEDS_DATA; while (uncheckedNextToken()); @@ -1841,6 +1880,8 @@ boolean skipRemainingDelimitedContainerElements_1_1() { * @return true if the end of the stream was reached before skipping past all remaining elements; otherwise, false. */ private boolean slowSkipRemainingDelimitedContainerElements_1_1() { + // TODO this needs to be updated ot handle the case where the container contains non-prefixed macro invocations, + // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. while (event != Event.END_CONTAINER) { slowNextToken(); if (event == Event.START_CONTAINER && valueMarker.endIndex == DELIMITED_MARKER) { @@ -2523,6 +2564,30 @@ private void pushContainer() { parent = containerStack[containerIndex]; } + + /** + * Doubles the size of the cursor's argument group stack. + */ + private void growArgumentGroupStack() { + ArgumentGroupMarker[] newStack = new ArgumentGroupMarker[argumentGroupStack.length * 2]; + System.arraycopy(argumentGroupStack, 0, newStack, 0, argumentGroupStack.length); + for (int i = argumentGroupStack.length; i < newStack.length; i++) { + newStack[i] = new ArgumentGroupMarker(); + } + argumentGroupStack = newStack; + } + + /** + * Push a Marker representing the current argument group onto the stack. + * @return the marker at the new top of the stack. + */ + private ArgumentGroupMarker pushArgumentGroup() { + if (++argumentGroupIndex >= argumentGroupStack.length) { + growArgumentGroupStack(); + } + return argumentGroupStack[argumentGroupIndex]; + } + /** * Step into the current container. */ @@ -2608,7 +2673,7 @@ public Event stepOutOfContainer() { } // Seek past the remaining bytes at this depth and pop from the stack. if (parent.endIndex == DELIMITED_MARKER) { - if (skipRemainingDelimitedContainerElements_1_1()) { + if (uncheckedSkipRemainingDelimitedContainerElements_1_1()) { return event; } } else { @@ -2840,6 +2905,8 @@ private void slowNextToken() { * @return true if not enough data was available in the stream; otherwise, false. */ private boolean slowSkipRemainingValueBytes() { + // TODO this needs to be updated ot handle the case where the value is a non-prefixed macro invocation, + // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. if (valueMarker.endIndex == DELIMITED_MARKER && valueTid != null && valueTid.isDelimited) { seekPastDelimitedContainer_1_1(); if (event == Event.NEEDS_DATA) { @@ -3035,20 +3102,14 @@ private long calculateTaglessLengthAndType(PrimitiveType primitiveType) { } /** - * Advances the cursor to the next value, assuming that it is tagless with the given type, skipping the current - * value (if any). This method may return: - * - * @param primitiveType the {@link PrimitiveType} of the tagless value on which to position the cursor. - * @return an Event conveying the result of the operation. + * Skips any bytes remaining in the current token, positioning the cursor on the first byte of the next token. + * @return true if not enough data was available in the stream to skip the previous value; otherwise, false. */ - public Event nextTaglessValue(PrimitiveType primitiveType) { + private boolean skipToNextToken() { event = Event.NEEDS_DATA; if (isSlowMode) { if (slowSkipToNextToken()) { - return event; + return true; } } else { if (peekIndex < valueMarker.endIndex) { @@ -3061,6 +3122,23 @@ public Event nextTaglessValue(PrimitiveType primitiveType) { reportConsumedData(); } reset(); + return false; + } + + /** + * Advances the cursor to the next value, assuming that it is tagless with the given type, skipping the current + * value (if any). This method may return: + * + * @param primitiveType the {@link PrimitiveType} of the tagless value on which to position the cursor. + * @return an Event conveying the result of the operation. + */ + public Event nextTaglessValue(PrimitiveType primitiveType) { + if (skipToNextToken()) { + return event; + } taglessType = primitiveType; valueTid = primitiveType.typeID; valueMarker.typeId = valueTid; @@ -3104,6 +3182,317 @@ public Event fillArgumentEncodingBitmap(int numberOfBytes) { return event; } + /** + * Reads the group continuation FlexUInt on which the cursor is currently positioned. + * @return the value of the continuation, or -1 if the end of the stream was reached. + */ + private long readGroupContinuation() { + long groupContinuation; + if (isSlowMode) { + groupContinuation = slowReadFlexUInt_1_1(); + if (groupContinuation < 0) { + return -1; + } + setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID); + } else { + groupContinuation = uncheckedReadFlexUInt_1_1(); + } + return groupContinuation; + } + + /** + * Positions the cursor after the previous token, then enters the tagged argument group that occurs at that + * position. It is up to the caller to ensure that a group actually exists at that location. This method may return: + * + * @return an Event conveying the result of the operation. + */ + public Event enterTaggedArgumentGroup() { + if (skipToNextToken()) { + return event; + } + long groupContinuation = readGroupContinuation(); + if (groupContinuation < 0) { + return event; + } + ArgumentGroupMarker group = pushArgumentGroup(); + group.pageStartIndex = peekIndex; + if (groupContinuation == 0) { + // Delimited argument group. + group.pageEndIndex = -1; + } else { + group.pageEndIndex = peekIndex + groupContinuation; + } + group.primitiveType = null; + valueMarker.endIndex = peekIndex; + event = Event.NEEDS_INSTRUCTION; + return event; + } + + /** + * Positions the cursor after the previous token, then enters the tagless argument group that occurs at that + * position. It is up to the caller to ensure that a group actually exists at that location. This method may return: + * + * @param primitiveType the primitive type of the values in the group. + * @return an Event conveying the result of the operation. + */ + public Event enterTaglessArgumentGroup(PrimitiveType primitiveType) { + if (skipToNextToken()) { + return event; + } + long indexBeforeFirstContinuation = peekIndex; + long groupContinuation = readGroupContinuation(); + if (groupContinuation < 0) { + return event; + } + if (groupContinuation == 0) { + // This is an empty group. Rather than storing extra state to track this rare special case, simply + // rewind and cause the continuation to be read again during nextGroupedValue(). + peekIndex = indexBeforeFirstContinuation; + } + ArgumentGroupMarker group = pushArgumentGroup();; + group.pageStartIndex = peekIndex; + group.pageEndIndex = peekIndex + groupContinuation; + group.primitiveType = primitiveType; + valueMarker.endIndex = peekIndex; + event = Event.NEEDS_INSTRUCTION; + return event; + } + + /** + * Attempts to fill the current page of the current argument group. This should only be called when it has been + * determined that the page is not already buffered in its entirety. + * @param group the group containing the page to fill. + * @return true if not enough data was available to fill the page; otherwise, false. + * @throws IonException if the cursor is not in 'slow' mode, indicating unexpected EOF. + */ + private boolean fillArgumentGroupPage(ArgumentGroupMarker group) { + if (isSlowMode) { + // Fill the entire page. + if (!fillAt(group.pageStartIndex, group.pageEndIndex - group.pageStartIndex)) { + event = Event.NEEDS_DATA; + return true; + } + // TODO performance: exit slow mode until the page is finished. + } else { + throw new IonException("Unexpected EOF: argument group extended beyond the end of the buffer."); + } + return false; + } + + /** + * Positions the cursor on the next value in the tagged group. Upon return, the value will be filled and + * `valueMarker` set to the value's start and end indices. + * @param group the group to which the value belongs. + * @return an Event conveying the result of the operation. + */ + private Event nextGroupedTaggedValue(ArgumentGroupMarker group) { + boolean isUserValue; // if false, the header represents no-op padding + if (group.pageEndIndex < 0) { + // Delimited. + int b; + if (isSlowMode) { + b = slowReadByte(); + if (b < 0) { + event = Event.NEEDS_DATA; + return event; + } + if (b == (OpCodes.DELIMITED_END_MARKER & SINGLE_BYTE_MASK)) { + group.pageEndIndex = peekIndex; + setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID); + event = Event.NEEDS_INSTRUCTION; + return event; + } + isUserValue = slowReadHeader(b, false, valueMarker); + } else { + b = buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK; + if (b == (OpCodes.DELIMITED_END_MARKER & SINGLE_BYTE_MASK)) { + group.pageEndIndex = peekIndex; + event = Event.NEEDS_INSTRUCTION; + return event; + } + isUserValue = uncheckedReadHeader(b, false, valueMarker); + } + } else { + if (peekIndex == group.pageEndIndex) { + // End of the group + event = Event.NEEDS_INSTRUCTION; + return event; + } + if (group.pageEndIndex > limit && fillArgumentGroupPage(group)) { + return event; + } + isUserValue = uncheckedReadHeader(buffer[(int)(peekIndex++)] & SINGLE_BYTE_MASK, false, valueMarker); + } + valueTid = valueMarker.typeId; + if (!isUserValue) { + throw new IonException("No-op padding is not currently supported in argument groups."); + } + return event; + } + + /** + * Positions the cursor on the next value in the tagless group. Upon return, the value will be filled and + * `valueMarker` set to the value's start and end indices. + * @param group the group to which the value belongs. + * @return an Event conveying the result of the operation. + */ + private Event nextGroupedTaglessValue(ArgumentGroupMarker group) { + if (peekIndex == group.pageEndIndex) { + // End of the page. + long continuation = readGroupContinuation(); + if (continuation == 0) { + // End of the group + event = Event.NEEDS_INSTRUCTION; + return event; + } + group.pageEndIndex = peekIndex + continuation; + } + if (group.pageEndIndex > limit && fillArgumentGroupPage(group)) { + return event; + } + // TODO performance: for fixed-width tagless types, the following could be skipped after the first value. + nextTaglessValue(group.primitiveType); + return event; + } + + /** + * Positions the cursor on the next value in the group. Upon return, the value will be filled and `valueMarker` set + * to the value's start and end indices. This method may return: + * + * @return an Event conveying the result of the operation. + */ + public Event nextGroupedValue() { + ArgumentGroupMarker group = argumentGroupStack[argumentGroupIndex]; + if (peekIndex < valueMarker.endIndex) { + peekIndex = valueMarker.endIndex; + } + if (group.primitiveType == null) { + return nextGroupedTaggedValue(group); + } + return nextGroupedTaglessValue(group); + } + + /** + * Seeks the cursor to the end of the current page of the argument group. + * @param group the group in which to seek. + * @return true if there was not enough data to complete the seek; otherwise, false. + */ + private boolean seekToEndOfArgumentGroupPage(ArgumentGroupMarker group) { + if (isSlowMode) { + if (slowSeek(group.pageEndIndex - offset)) { + return true; + } + peekIndex = offset; + } else { + peekIndex = group.pageEndIndex; + } + return false; + } + + // Dummy delimited container to be used when seeking forward to a delimited end marker of a synthetic container, + // like an argument group. + private static final IonTypeID DUMMY_DELIMITED_CONTAINER = TYPE_IDS_1_1[OpCodes.DELIMITED_SEXP & SINGLE_BYTE_MASK]; + + /** + * Seeks to the end of the current delimited argument group. + * @return true if not enough data was available to complete the seek; otherwise, false. + */ + private boolean seekToEndOfDelimitedArgumentGroup() { + // Push a dummy delimited container onto the stack, preparing the cursor to seek forward to the delimited end + // marker applicable at the current depth. + pushContainer(); + parent.endIndex = -1; + parent.typeId = DUMMY_DELIMITED_CONTAINER; + boolean isEof; + if (isSlowMode) { + isEof = slowSkipRemainingDelimitedContainerElements_1_1(); + } else { + isEof = uncheckedSkipRemainingDelimitedContainerElements_1_1(); + } + // Pop the dummy delimited container from the stack. + if (--containerIndex >= 0) { + parent = containerStack[containerIndex]; + } else { + parent = null; + containerIndex = -1; + } + return isEof; + } + + /** + * Exits the cursor's current tagged argument group. + * @param group the group to exit. + * @return an Event conveying the result of the operation (either NEEDS_DATA or NEEDS_INSTRUCTION). + */ + private Event exitTaggedArgumentGroup(ArgumentGroupMarker group) { + if (group.pageEndIndex < 0) { + if (seekToEndOfDelimitedArgumentGroup()) { + return event; + } + } else if (seekToEndOfArgumentGroupPage(group)) { + return event; + } + event = Event.NEEDS_INSTRUCTION; + return event; + } + + /** + * Exits the cursor's current tagless argument group. + * @param group the group to exit. + * @return an Event conveying the result of the operation (either NEEDS_DATA or NEEDS_INSTRUCTION). + */ + private Event exitTaglessArgumentGroup(ArgumentGroupMarker group) { + long continuation = -1; + while (continuation != 0) { + if (seekToEndOfArgumentGroupPage(group)) { + return event; + } + continuation = readGroupContinuation(); + if (continuation < 0) { + return event; + } + group.pageEndIndex = peekIndex + continuation; + } + event = Event.NEEDS_INSTRUCTION; + return event; + } + + /** + * Exits the cursor's current argument group. This method may return: + * + * @return an Event conveying the result of the operation. + */ + public Event exitArgumentGroup() { + ArgumentGroupMarker group = argumentGroupStack[argumentGroupIndex]; + if (group.pageEndIndex >= 0 && peekIndex >= group.pageEndIndex) { + event = Event.NEEDS_INSTRUCTION; + return event; + } + event = Event.NEEDS_DATA; + if (group.primitiveType == null) { + return exitTaggedArgumentGroup(group); + } + return exitTaglessArgumentGroup(group); + } + @Override public Event fillValue() { event = Event.VALUE_READY; @@ -3282,6 +3671,7 @@ public void close() { } buffer = null; containerStack = null; + argumentGroupStack = null; byteBuffer = null; terminate(); } diff --git a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java index 129d1c13f..640b0ba80 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java @@ -1372,4 +1372,357 @@ public void macroInvocationWithIdInOpcodeAndMultiByteAEB(boolean constructFromBy public void macroInvocationWithIdInOpcodeAndMultiByteAEBIncremental() throws Exception { assertAEBThenIntZeroIncremental(macroWithThreeByteAEBThenIntZero(), 3); } + + + private static ExpectationProvider enterTaglessArgumentGroup(IonCursorBinary.PrimitiveType type) { + return consumer -> consumer.accept(new Expectation<>( + String.format("enter tagless %s group", type.name()), + cursor -> assertEquals(NEEDS_INSTRUCTION, cursor.enterTaglessArgumentGroup(type)) + )); + } + + private static ExpectationProvider enterTaggedArgumentGroup() { + return consumer -> consumer.accept(new Expectation<>( + "enter tagged group", + cursor -> assertEquals(NEEDS_INSTRUCTION, cursor.enterTaggedArgumentGroup()) + )); + } + + private static ExpectationProvider nextGroupedValue(IonType expectedType, int expectedStartIndex, int expectedEndIndex) { + return consumer -> consumer.accept(new Expectation<>( + String.format("grouped value %s[%d, %d]", expectedType, expectedStartIndex, expectedEndIndex), + cursor -> { + assertEquals(IonType.isContainer(expectedType) ? START_CONTAINER : START_SCALAR, cursor.nextGroupedValue()); + assertValueMarker(cursor, expectedType, expectedStartIndex, expectedEndIndex); + } + )); + } + + private static ExpectationProvider endOfGroup() { + return consumer -> consumer.accept(new Expectation<>( + "end of group", + cursor -> assertEquals(NEEDS_INSTRUCTION, cursor.nextGroupedValue()) + )); + } + + private static ExpectationProvider exitArgumentGroup() { + return consumer -> consumer.accept(new Expectation<>( + "exit group", + cursor -> assertEquals(NEEDS_INSTRUCTION, cursor.exitArgumentGroup()) + )); + } + + private static byte[] taglessArgumentGroup() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00000010 -- one grouped argument \n" + + "03 | FlexUInt 1 - page length 1 byte \n" + + "0A | int 10 \n" + + "03 | FlexUInt 1 - page length 1 byte \n" + + "0B | int 11 \n" + + "01 | FlexUInt 0 - end of argument group \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fullyTraverseTaglessArgumentGroup(boolean constructFromBytes) throws Exception { + byte[] data = taglessArgumentGroup(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaglessArgumentGroup(IonCursorBinary.PrimitiveType.UINT8), + nextGroupedValue(IonType.INT, 7, 8), + nextGroupedValue(IonType.INT, 9, 10), + endOfGroup(), + exitArgumentGroup(), + endStream() + ); + } + } + + private static byte[] taggedPrefixedArgumentGroup() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00000010 -- one grouped argument \n" + + "0B | FlexUInt 1 - group length 5 bytes \n" + + "60 | int 0 \n" + + "B3 | List length 3 \n" + + "91 | String length 1 \n" + + "61 | 'a' \n" + + "6A | Float 0 \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fullyTraverseTaggedPrefixedArgumentGroup(boolean constructFromBytes) throws Exception { + byte[] data = taggedPrefixedArgumentGroup(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + nextGroupedValue(IonType.INT, 8, 8), + nextGroupedValue(IonType.LIST, 9, 12), + stepInToContainer(IonType.LIST, 9, 12), + nextTaggedValue(IonType.STRING, 10, 11), + nextTaggedValue(IonType.FLOAT, 12, 12), + stepOutOfContainer(), + endOfGroup(), + exitArgumentGroup(), + endStream() + ); + } + } + + private static byte[] taggedDelimitedArgumentGroup() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00000010 -- one grouped argument \n" + + "01 | FlexUInt 0 - delimited group \n" + + "60 | int 0 \n" + + "B3 | List length 3 \n" + + "91 | String length 1 \n" + + "61 | 'a' \n" + + "6A | Float 0 \n" + + "F0 | End of delimited group \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void fullyTraverseTaggedDelimitedArgumentGroup(boolean constructFromBytes) throws Exception { + byte[] data = taggedDelimitedArgumentGroup(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + nextGroupedValue(IonType.INT, 8, 8), + nextGroupedValue(IonType.LIST, 9, 12), + stepInToContainer(IonType.LIST, 9, 12), + nextTaggedValue(IonType.STRING, 10, 11), + nextTaggedValue(IonType.FLOAT, 12, 12), + stepOutOfContainer(), + endOfGroup(), + exitArgumentGroup(), + endStream() + ); + } + } + + private static byte[] emptyArgumentGroups() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00001010 -- two grouped arguments \n" + + // First group: interpreted as tagged + "01 | FlexUInt 0 - delimited group \n" + + "F0 | End of delimited group \n" + + // Second group: interpreted as tagless + "01 | FlexUInt 0 - end of argument group \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void emptyArgumentGroups(boolean constructFromBytes) throws Exception { + byte[] data = emptyArgumentGroups(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + endOfGroup(), + exitArgumentGroup(), + enterTaglessArgumentGroup(IonCursorBinary.PrimitiveType.UINT8), + endOfGroup(), + exitArgumentGroup(), + endStream() + ); + } + } + + private static byte[] twoArgumentGroupsFollowedBySingleValue() throws Exception { + return withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "05 | AEB 0b00001010 -- two grouped arguments \n" + + // First group: interpreted as tagged + "01 | FlexUInt 0 - delimited group \n" + + "60 | int 0 \n" + + "B3 | List length 3 \n" + + "91 | String length 1 \n" + + "61 | 'a' \n" + + "6A | Float 0 \n" + + "F0 | End of delimited group \n" + + // Second group: interpreted as tagless + "03 | FlexUInt 1 - page length 1 byte \n" + + "0A | int 10 \n" + + "03 | FlexUInt 1 - page length 1 byte \n" + + "0B | int 11 \n" + + "01 | FlexUInt 0 - end of argument group \n" + + "B1 | List length 1 \n" + + "60 | int 0 \n" + ))); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void twoArgumentGroupsFollowedBySingleValue(boolean constructFromBytes) throws Exception { + byte[] data = twoArgumentGroupsFollowedBySingleValue(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + nextGroupedValue(IonType.INT, 8, 8), + nextGroupedValue(IonType.LIST, 9, 12), + stepInToContainer(IonType.LIST, 9, 12), + nextTaggedValue(IonType.STRING, 10, 11), + nextTaggedValue(IonType.FLOAT, 12, 12), + stepOutOfContainer(), + endOfGroup(), + exitArgumentGroup(), + enterTaglessArgumentGroup(IonCursorBinary.PrimitiveType.UINT8), + nextGroupedValue(IonType.INT, 14, 15), + nextGroupedValue(IonType.INT, 16, 17), + endOfGroup(), + exitArgumentGroup(), + container( + scalar(), valueMarker(IonType.INT, 20, 20) + ), + endStream() + ); + } + } + + @Test + public void twoArgumentGroupsFollowedBySingleValueIncremental() throws Exception { + byte[] data = twoArgumentGroupsFollowedBySingleValue(); + List instructions = Arrays.asList( + instruction(IonCursorBinary::nextValue, macroInvocation(0x13)), + instruction(cursor -> cursor.fillArgumentEncodingBitmap(1), valueMarker(null, 5, 6)), + instruction(IonCursorBinary::enterTaggedArgumentGroup, event(NEEDS_INSTRUCTION)), + instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 8, 8)), + instruction( + cursor -> { + if (cursor.nextGroupedValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + assertValueMarker(cursor, IonType.LIST, 9, 12); + return cursor.stepIntoContainer(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction(IonCursorBinary::nextValue, valueMarker(IonType.STRING, 10, 11)), + instruction( + cursor -> { + if (cursor.nextValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + // Note: the value byte of the string is skipped, not buffered. + assertValueMarker(cursor, IonType.FLOAT, 11, 11); + return cursor.stepOutOfContainer(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction( + cursor -> { + if (cursor.nextGroupedValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + return cursor.exitArgumentGroup(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction(cursor -> cursor.enterTaglessArgumentGroup(IonCursorBinary.PrimitiveType.UINT8), event(NEEDS_INSTRUCTION)), + instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 13, 14)), + instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 15, 16)), + instruction( + cursor -> { + if (cursor.nextGroupedValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + return cursor.exitArgumentGroup(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction( + cursor -> { + if (cursor.nextValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + assertValueMarker(cursor, IonType.LIST, 18, 19); + return cursor.stepIntoContainer(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction( + cursor -> { + if (cursor.nextValue() == NEEDS_DATA) { + return NEEDS_DATA; + } + assertValueMarker(cursor, IonType.INT, 19, 19); + return cursor.stepOutOfContainer(); + }, + event(NEEDS_INSTRUCTION) + ), + // This is the end of the stream, so the response is not used. + instruction(IonCursorBinary::nextValue, null) + ); + executeIncrementally(data, instructions); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void skipOverArgumentGroups(boolean constructFromBytes) throws Exception { + byte[] data = twoArgumentGroupsFollowedBySingleValue(); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertSequence( + cursor, + nextMacroInvocation(0x13), valueMarker(null, 5, -1), + fillArgumentEncodingBitmap(1, 5, 6), + enterTaggedArgumentGroup(), + nextGroupedValue(IonType.INT, 8, 8), + exitArgumentGroup(), // Early exit + enterTaglessArgumentGroup(IonCursorBinary.PrimitiveType.UINT8), + exitArgumentGroup(), // Skip over group + container( + scalar(), valueMarker(IonType.INT, 20, 20) + ), + endStream() + ); + } + } + + @Test + public void skipOverArgumentGroupsIncremental() throws Exception { + byte[] data = twoArgumentGroupsFollowedBySingleValue(); + List instructions = Arrays.asList( + instruction(IonCursorBinary::nextValue, macroInvocation(0x13)), + instruction(cursor -> cursor.fillArgumentEncodingBitmap(1), valueMarker(null, 5, 6)), + instruction(IonCursorBinary::enterTaggedArgumentGroup, event(NEEDS_INSTRUCTION)), + instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 8, 8)), + // Skip the list argument + instruction(IonCursorBinary::exitArgumentGroup, event(NEEDS_INSTRUCTION)), + instruction(cursor -> cursor.enterTaglessArgumentGroup(IonCursorBinary.PrimitiveType.UINT8), event(NEEDS_INSTRUCTION)), + // Skip all arguments in the group + instruction(IonCursorBinary::exitArgumentGroup, event(NEEDS_INSTRUCTION)), + instruction(IonCursorBinary::nextValue, valueMarker(IonType.LIST, 16, 17)), + // This is the end of the stream, so the response is not used. + instruction(IonCursorBinary::nextValue, null) + ); + executeIncrementally(data, instructions); + } + + // TODO Nest argument groups >8 deep, exercising argument group stack growth. + // TODO Add more incremental tests for various argument group combinations, improving coverage of NEEDS_DATA cases. + // TODO Extend a tagged prefixed argument group page beyond the current buffer limit. In slow mode, this should + // cause the whole page to be filled. In unchecked mode, this should be an error for unexpected EOF. } From d15d21a658bfc5be2a7c075c634ce36939360838 Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Tue, 16 Jul 2024 13:05:07 -0700 Subject: [PATCH 2/2] Update src/main/java/com/amazon/ion/impl/IonCursorBinary.java Co-authored-by: Matthew Pope <81593196+popematt@users.noreply.github.com> --- src/main/java/com/amazon/ion/impl/IonCursorBinary.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index 757477665..7030b23cc 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -1862,7 +1862,7 @@ private boolean slowIsDelimitedEnd_1_1() { * @return true if the end of the stream was reached before skipping past all remaining elements; otherwise, false. */ boolean uncheckedSkipRemainingDelimitedContainerElements_1_1() { - // TODO this needs to be updated ot handle the case where the container contains non-prefixed macro invocations, + // TODO this needs to be updated to handle the case where the container contains non-prefixed macro invocations, // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. while (event != Event.END_CONTAINER) { event = Event.NEEDS_DATA;