From 787fb3e79a91862594daa282bf4b90e1b5c79ef9 Mon Sep 17 00:00:00 2001 From: Tyler Gregg Date: Tue, 24 Dec 2024 14:53:40 -0800 Subject: [PATCH] Fixes various implementation gaps and bugs related to skipping and filling binary macro invocations. --- .../com/amazon/ion/impl/IonCursorBinary.java | 481 ++++++++++++++---- ...IonReaderContinuableApplicationBinary.java | 5 + .../impl/IonReaderContinuableCoreBinary.java | 38 +- .../ion/impl/macro/EExpressionArgsReader.java | 4 +- .../EncodingDirectiveCompilationTest.java | 176 +++++++ .../amazon/ion/impl/IonCursorBinaryTest.java | 162 +++++- ...onReaderContinuableTopLevelBinaryTest.java | 340 +++++++++++++ 7 files changed, 1079 insertions(+), 127 deletions(-) diff --git a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java index 026299795..6a67dc5a1 100644 --- a/src/main/java/com/amazon/ion/impl/IonCursorBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonCursorBinary.java @@ -11,6 +11,10 @@ import com.amazon.ion.SystemSymbols; import com.amazon.ion.impl.bin.FlexInt; import com.amazon.ion.impl.bin.OpCodes; +import com.amazon.ion.impl.bin.PresenceBitmap; +import com.amazon.ion.impl.macro.EncodingContext; +import com.amazon.ion.impl.macro.Macro; +import com.amazon.ion.impl.macro.MacroRef; import java.io.ByteArrayInputStream; import java.io.EOFException; @@ -18,6 +22,7 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.List; import static com.amazon.ion.impl.IonTypeID.DELIMITED_END_ID; import static com.amazon.ion.impl.IonTypeID.ONE_ANNOTATION_FLEX_SYM_LOWER_NIBBLE_1_1; @@ -54,7 +59,7 @@ class IonCursorBinary implements IonCursor { private static final int CONTAINER_STACK_INITIAL_CAPACITY = 8; // When set as an 'endIndex', indicates that the value is delimited. - private static final int DELIMITED_MARKER = -1; + static final int DELIMITED_MARKER = -1; /** * The kind of location at which `checkpoint` points. @@ -769,6 +774,11 @@ private void shiftContainerEnds(long shiftAmount) { containerStack[i].endIndex -= shiftAmount; } } + for (int i = argumentGroupIndex; i >= 0; i--) { + if (argumentGroupStack[i].pageEndIndex > 0) { + argumentGroupStack[i].pageEndIndex -= shiftAmount; + } + } } /** @@ -781,7 +791,9 @@ private void shiftIndicesLeft(int shiftAmount) { peekIndex = Math.max(peekIndex - shiftAmount, 0); valuePreHeaderIndex -= shiftAmount; valueMarker.startIndex -= shiftAmount; - valueMarker.endIndex -= shiftAmount; + if (valueMarker.endIndex != DELIMITED_MARKER) { + valueMarker.endIndex -= shiftAmount; + } checkpoint -= shiftAmount; if (fieldTextMarker.startIndex > -1) { fieldTextMarker.startIndex -= shiftAmount; @@ -842,6 +854,10 @@ private long refill(long minimumNumberOfBytesRequired) { * @return true if not enough data was available in the stream; otherwise, false. */ private boolean slowSeek(long numberOfBytes) { + if (refillableState.pinOffset > -1) { + // If data is being pinned, fill the buffer instead of potentially skipping directly from the source. + return !fillAt(offset, numberOfBytes); + } long size = availableAt(offset); long unbufferedBytesToSkip = numberOfBytes - size; if (unbufferedBytesToSkip <= 0) { @@ -1887,13 +1903,80 @@ private boolean slowIsDelimitedEnd_1_1() { return false; } + /** + * Loads the presence bitmap for the given signature. Upon calling this method, `peekIndex` must point to the + * first byte in the argument encoding bitmap (if applicable for the signature). + * @param signature the signature of the macro invocation for which to read a presence bitmap. + * @return the presence bitmap. + */ + protected PresenceBitmap loadPresenceBitmap(List signature) { + // TODO performance: reuse or pool the presence bitmaps. Ideally, it would not be necessary to allocate them + // at all, but especially not when an invocation does not even have an argument encoding bitmap. + PresenceBitmap presenceBitmap = new PresenceBitmap(); + presenceBitmap.initialize(signature); + if (presenceBitmap.getByteSize() > 0) { + if (fillArgumentEncodingBitmap(presenceBitmap.getByteSize()) == IonCursor.Event.NEEDS_DATA) { + throw new UnsupportedOperationException("TODO: support continuable parsing of AEBs."); + } + } + // Note: when there is no AEB, the following initializes the presence bitmap to "EXPRESSION" for each parameter. + presenceBitmap.readFrom(buffer, (int) valueMarker.startIndex); + presenceBitmap.validate(); + return presenceBitmap; + } + + /** + * @return the {@link EncodingContext} currently active, or {@code null}. + */ + EncodingContext getEncodingContext() { + throw new IonException("There is no encoding context at the IonCursor level."); + } + + /** + * Skips past the current macro parameter. + * @param parameter the parameter to skip. + */ + private void uncheckedSkipMacroParameter(Macro.Parameter parameter) { + switch (parameter.getType()) { + case Tagged: + uncheckedNextToken(); + break; + case Int8: + case Uint8: + peekIndex += 1; + break; + case Int16: + case Uint16: + case Float16: + peekIndex += 2; + break; + case Int32: + case Uint32: + case Float32: + peekIndex += 4; + break; + case Int64: + case Uint64: + case Float64: + peekIndex += 8; + break; + case FlexUint: + uncheckedReadFlexUInt_1_1(); + break; + case FlexInt: + uncheckedReadFlexInt_1_1(); + break; + case FlexSym: + uncheckedReadFlexSym_1_1(valueMarker); + break; + } + } + /** * Skips past the remaining elements of the current delimited container. * @return true if the end of the stream was reached before skipping past all remaining elements; otherwise, false. */ boolean uncheckedSkipRemainingDelimitedContainerElements_1_1() { - // TODO this needs to be updated to handle the case where the container contains non-prefixed macro invocations, - // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. while (event != Event.END_CONTAINER) { event = Event.NEEDS_DATA; while (uncheckedNextToken()); @@ -1904,19 +1987,124 @@ boolean uncheckedSkipRemainingDelimitedContainerElements_1_1() { return false; } + /** + * Skips past the current macro parameter, ensuring enough bytes are available in the stream. + * @param parameter the parameter to skip. + * @return true if the end of the stream was reached before skipping past the parameter; otherwise, false. + */ + private boolean slowSkipMacroParameter(Macro.Parameter parameter) { + switch (parameter.getType()) { + case Tagged: + slowNextToken(); + if (event == Event.NEEDS_DATA) { + return true; + } + break; + case Int8: + case Uint8: + if (!fillAt(peekIndex, 1)) { + return true; + } + peekIndex += 1; + break; + case Int16: + case Uint16: + case Float16: + if (!fillAt(peekIndex, 2)) { + return true; + } + peekIndex += 2; + break; + case Int32: + case Uint32: + case Float32: + if (!fillAt(peekIndex, 4)) { + return true; + } + peekIndex += 4; + break; + case Int64: + case Uint64: + case Float64: + if (!fillAt(peekIndex, 8)) { + return true; + } + peekIndex += 8; + break; + case FlexUint: + if (slowReadFlexUInt_1_1() < 0) { + return true; + } + break; + case FlexInt: + if (slowReadFlexInt_1_1(valueMarker)) { + return true; + } + break; + case FlexSym: + if (FlexSymType.INCOMPLETE == slowSkipFlexSym_1_1(valueMarker)) { + return true; + } + break; + } + return false; + } + + /** + * Skips past the current macro invocation, ensuring enough bytes are available in the stream. + * @return true if the end of the stream was reached before skipping past the invocation; otherwise, false. + */ + private boolean skipMacroInvocation() { + EncodingContext context = isSystemInvocation ? EncodingContext.getDefault() : getEncodingContext(); + Macro macro = context.getMacroTable().get(MacroRef.byId((int) macroInvocationId)); + if (macro == null) { + throw new IonException(String.format("Cannot skip over unknown macro with ID %d.", macroInvocationId)); + } + stepIntoEExpression(); + List signature = macro.getSignature(); + PresenceBitmap presenceBitmap = loadPresenceBitmap(signature); + for (int i = 0; i < signature.size(); i++) { + Macro.Parameter parameter = signature.get(i); + long parameterPresence = presenceBitmap.get(i); + if (parameterPresence == PresenceBitmap.VOID) { + continue; + } + if (parameterPresence == PresenceBitmap.GROUP) { + Macro.ParameterEncoding encoding = parameter.getType(); + if (encoding == Macro.ParameterEncoding.Tagged) { + enterTaggedArgumentGroup(); + } else { + enterTaglessArgumentGroup(encoding.taglessEncodingKind); + } + if (event == Event.NEEDS_DATA) { + return true; + } + if (exitArgumentGroup() == Event.NEEDS_DATA) { + return true; + } + } else { + // A single expression + if (isSlowMode) { + if (slowSkipMacroParameter(parameter)) { + return true; + } + } else { + uncheckedSkipMacroParameter(parameter); + } + } + } + stepOutOfEExpression(); + return false; + } + /** * Skips past the remaining elements of the current delimited container, ensuring enough bytes are available in * the stream. * @return true if the end of the stream was reached before skipping past all remaining elements; otherwise, false. */ private boolean slowSkipRemainingDelimitedContainerElements_1_1() { - // TODO this needs to be updated ot handle the case where the container contains non-prefixed macro invocations, - // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. while (event != Event.END_CONTAINER) { slowNextToken(); - if (event == Event.START_CONTAINER && valueMarker.endIndex == DELIMITED_MARKER) { - seekPastDelimitedContainer_1_1(); - } if (event == Event.NEEDS_DATA) { return true; } @@ -2127,18 +2315,25 @@ private void setMarker(long endIndex, Marker markerToSet) { * @return true if the end of the current container has been reached; otherwise, false. */ private boolean checkContainerEnd() { - if (peekIndex < valueMarker.endIndex || parent.endIndex > peekIndex) { + if ( + // The end of the current value hasn't been reached. + peekIndex < valueMarker.endIndex || + // The end of the parent container hasn't been reached. + parent.endIndex > peekIndex || + // A container was just encountered, but not yet stepped in. + checkpointLocation == CheckpointLocation.AFTER_CONTAINER_HEADER + ) { return false; } - if (parent.endIndex == DELIMITED_MARKER) { - return isSlowMode ? slowIsDelimitedEnd_1_1() : uncheckedIsDelimitedEnd_1_1(); - } if (parent.endIndex == peekIndex) { event = Event.END_CONTAINER; valueTid = null; fieldSid = -1; return true; } + if (parent.endIndex == DELIMITED_MARKER || parent.typeId.isDelimited) { + return isSlowMode ? slowIsDelimitedEnd_1_1() : uncheckedIsDelimitedEnd_1_1(); + } throw new IonException("Contained values overflowed the parent container length."); } @@ -2248,6 +2443,22 @@ private void validateAnnotationWrapperEndIndex(long endIndex) { } } + /** + * Validates that the value on which the cursor is positioned is at the expected location. This may be useful after + * traversing a length-prefixed macro invocation's arguments in order to verify that the resulting cursor location + * matches the invocation's declared end index. + * @param expectedIndex the expected location. If negative (indicating a delimited value), no validation + * is performed. + */ + void validateValueEndIndex(long expectedIndex) { + // Note: the maximum of peekIndex and valueMarker.endIndex is used because 1) peekIndex may be less than + // valueMarker.endIndex if the cursor is positioned on a scalar value that has not been consumed, and 2) + // valueMarker.endIndex may not have been set if it represented a delimited value. + if (expectedIndex >= 0 && Math.max(peekIndex, valueMarker.endIndex) != expectedIndex) { + throw new IonException("Value length did not match the length prefix."); + } + } + /* * The given Marker's endIndex is set to the system symbol ID value and its startIndex is set to -1 * @param markerToSet the marker to set. @@ -2635,7 +2846,7 @@ private void uncheckedStepIntoContainer() { // Push the remaining length onto the stack, seek past the container's header, and increase the depth. pushContainer(); parent.typeId = valueTid; - parent.endIndex = valueTid.isDelimited ? DELIMITED_MARKER : valueMarker.endIndex; + parent.endIndex = valueMarker.endIndex; valueTid = null; event = Event.NEEDS_INSTRUCTION; reset(); @@ -2662,7 +2873,7 @@ private Event slowStepIntoContainer() { isSlowMode = false; } parent.typeId = valueMarker.typeId; - parent.endIndex = valueTid.isDelimited ? DELIMITED_MARKER : valueMarker.endIndex; + parent.endIndex = valueMarker.endIndex; setCheckpointBeforeUnannotatedTypeId(); valueTid = null; hasAnnotations = false; @@ -2670,17 +2881,27 @@ private Event slowStepIntoContainer() { return event; } + /** + * Exits slow mode if possible, allowing the current container to be read without repetitive buffer boundary checks. + * This method should only be called when the cursor is in slow mode. + * @return true if the cursor remains in slow mode; false if it exited slow mode. + */ + private boolean checkAndSetContainerMode() { + if (containerIndex != refillableState.fillDepth - 1) { + if (valueMarker.endIndex > DELIMITED_MARKER && valueMarker.endIndex <= limit) { + refillableState.fillDepth = containerIndex + 1; + } else { + return true; + } + } + isSlowMode = false; + return false; + } + @Override public Event stepIntoContainer() { - if (isSlowMode) { - if (containerIndex != refillableState.fillDepth - 1) { - if (valueMarker.endIndex > DELIMITED_MARKER && valueMarker.endIndex <= limit) { - refillableState.fillDepth = containerIndex + 1; - } else { - return slowStepIntoContainer(); - } - } - isSlowMode = false; + if (isSlowMode && checkAndSetContainerMode()) { + return slowStepIntoContainer(); } uncheckedStepIntoContainer(); return event; @@ -2688,45 +2909,108 @@ public Event stepIntoContainer() { /** * Steps into an e-expression, treating it as a logical container. + * @return `event`, which conveys the result. */ - void stepIntoEExpression() { + Event stepIntoEExpression() { if (valueTid == null || !valueTid.isMacroInvocation) { throw new IonException("Must be positioned on an e-expression."); } + if (isSlowMode && checkAndSetContainerMode()) { + if (refillableState.state != State.READY && !slowMakeBufferReady()) { + return event; + } + } pushContainer(); parent.typeId = valueTid; - // TODO support length prefixed e-expressions. - // TODO when the length is known to be within the buffer, exit slow mode. - parent.endIndex = DELIMITED_MARKER; + // Note: valueMarker.endIndex will be DELIMITED_MARKER for regular e-expressions, and a positive value for + // length-prefixed e-expressions. + parent.endIndex = valueMarker.endIndex; valueTid = null; + setCheckpointBeforeUnannotatedTypeId(); event = Event.NEEDS_INSTRUCTION; - reset(); + return event; + } + + /** + * Seeks to the end of the value on which the cursor is positioned (if any), or to the end of the e-expression + * (if known). Note: it is up to the caller to ensure that the e-expression is at its end when this method is + * called. + * @return true if not enough data was available in the stream to seek to the target location; otherwise, false. + */ + private boolean slowSeekToEndOfEExpression() { + if (refillableState.state != State.READY && !slowMakeBufferReady()) { + return true; + } + + event = Event.NEEDS_DATA; + // Seek past any remaining bytes from the previous value. + long targetIndex; + if (parent.endIndex == DELIMITED_MARKER) { + // The e-expression is not length-prefixed and its end has not previously been calculated. Just seek + // to the end of the current child value. The caller ensures that this is the last value in the expression. + targetIndex = valueMarker.endIndex; + } else { + targetIndex = parent.endIndex; + } + if (targetIndex > peekIndex) { + if (targetIndex > limit) { + if (slowSeek(targetIndex - offset)) { + return true; + } + } else { + peekIndex = targetIndex; + } + } else if (targetIndex == DELIMITED_MARKER && valueTid != null) { + seekPastDelimitedContainer_1_1(); + return event == Event.NEEDS_DATA; + } + return false; } /** - * Steps out of an e-expression, restoring the context of the parent container (if any). + * Seeks to the end of the value on which the cursor is positioned (if any), or to the end of the e-expression + * (if known). Note: it is up to the caller to ensure either that the e-expression is buffered in its entirety + * and either 1) it is at its end when this method is called, or 2) the end of the e-expression is known due to + * being length-prefixed or previously calculated. */ - void stepOutOfEExpression() { + private void uncheckedSeekToEndOfEExpression() { + if (parent.endIndex == DELIMITED_MARKER) { + if (valueMarker.endIndex > peekIndex) { + peekIndex = valueMarker.endIndex; + } else if (valueMarker.endIndex == DELIMITED_MARKER && valueTid != null) { + seekPastDelimitedContainer_1_1(); + } + } else { + peekIndex = parent.endIndex; + } + } + + /** + * Steps out of an e-expression, restoring the context of the parent container (if any). Note: it is up to the + * caller to ensure either that either 1) the e-expression is at its end when this method is called, or 2) the end + * of the e-expression is known due to being length-prefixed or previously calculated. This method will skip any + * bytes remaining in the value on which it is currently positioned, then step out of the e-expression. It will + * *not* attempt to interpret the e-expression's signature and attempt to find its actual end. + * @return NEEDS_INSTRUCTION if successful, or NEEDS_DATA if more data is required to skip past the value on which + * the cursor is positioned (if any). + */ + Event stepOutOfEExpression() { if (parent == null) { throw new IonException("Cannot step out at the top level."); } if (!parent.typeId.isMacroInvocation) { throw new IonException("Not positioned within an e-expression."); } - // TODO support early step-out when support for lazy parsing of e-expressions is added (including continuable - // reading). - if (valueMarker.endIndex > peekIndex) { - peekIndex = valueMarker.endIndex; - } - setCheckpointBeforeUnannotatedTypeId(); - if (--containerIndex >= 0) { - parent = containerStack[containerIndex]; + if (isSlowMode) { + if (slowSeekToEndOfEExpression()) { + return event; + } + slowPopContainer(); } else { - parent = null; - containerIndex = -1; + uncheckedSeekToEndOfEExpression(); + uncheckedPopContainer(); } - valueTid = null; - event = Event.NEEDS_INSTRUCTION; + return event; } /** @@ -2737,6 +3021,29 @@ private void resumeSlowMode() { isSlowMode = true; } + /** + * Pops from the container stack and resets the cursor's state as necessary, including resuming slow mode if + * the cursor's buffer is not known to extend to the remaining values at the stepped-out depth. + */ + private void uncheckedPopContainer() { + setCheckpointBeforeUnannotatedTypeId(); + if (--containerIndex >= 0) { + parent = containerStack[containerIndex]; + if (refillableState != null && containerIndex < refillableState.fillDepth) { + resumeSlowMode(); + } + } else { + parent = null; + containerIndex = -1; + if (refillableState != null) { + resumeSlowMode(); + } + } + valueTid = null; + event = Event.NEEDS_INSTRUCTION; + hasAnnotations = false; + } + @Override public Event stepOutOfContainer() { if (isSlowMode) { @@ -2756,25 +3063,25 @@ public Event stepOutOfContainer() { } else { peekIndex = parent.endIndex; } - if (!isSlowMode) { - setCheckpointBeforeUnannotatedTypeId(); - } + uncheckedPopContainer(); + return event; + } + + /** + * Pops from the container stack and resets the cursor's state as necessary. This method is called when `isSlowMode` + * is true. Otherwise, {@link #uncheckedPopContainer()} must be called. + */ + private void slowPopContainer() { + setCheckpointBeforeUnannotatedTypeId(); if (--containerIndex >= 0) { parent = containerStack[containerIndex]; - if (refillableState != null && containerIndex < refillableState.fillDepth) { - resumeSlowMode(); - } } else { parent = null; containerIndex = -1; - if (refillableState != null) { - resumeSlowMode(); - } } - - valueTid = null; event = Event.NEEDS_INSTRUCTION; - return event; + valueTid = null; + hasAnnotations = false; } /** @@ -2804,16 +3111,7 @@ private Event slowStepOutOfContainer() { } peekIndex = offset; } - setCheckpointBeforeUnannotatedTypeId(); - if (--containerIndex >= 0) { - parent = containerStack[containerIndex]; - } else { - parent = null; - containerIndex = -1; - } - event = Event.NEEDS_INSTRUCTION; - valueTid = null; - hasAnnotations = false; + slowPopContainer(); return event; } @@ -2822,11 +3120,11 @@ private Event slowStepOutOfContainer() { * @return true if the end of the container has been reached; otherwise, false. */ private boolean uncheckedNextContainedToken() { - if (parent.endIndex == DELIMITED_MARKER) { - return uncheckedIsDelimitedEnd_1_1(); - } else if (parent.endIndex == peekIndex) { + if (parent.endIndex == peekIndex) { event = Event.END_CONTAINER; return true; + } else if (parent.endIndex == DELIMITED_MARKER || parent.typeId.isDelimited) { + return uncheckedIsDelimitedEnd_1_1(); } else if (parent.endIndex < peekIndex) { throw new IonException("Contained values overflowed the parent container length."); } else if (parent.typeId.type == IonType.STRUCT) { @@ -2871,7 +3169,11 @@ private void reportSkippedData() { */ private boolean uncheckedNextToken() { if (peekIndex < valueMarker.endIndex) { + // TODO length-prefixed macro invocations currently follow this path. However, such invocations may + // expand to system values and need to be checked before being skipped over. peekIndex = valueMarker.endIndex; + } else if (macroInvocationId >= 0) { + skipMacroInvocation(); } else if (valueTid != null && valueTid.isDelimited) { seekPastDelimitedContainer_1_1(); } @@ -2986,25 +3288,24 @@ private void slowNextToken() { * @return true if not enough data was available in the stream; otherwise, false. */ private boolean slowSkipRemainingValueBytes() { - // TODO this needs to be updated ot handle the case where the value is a non-prefixed macro invocation, - // as the length of these invocations is unknown to the cursor. Input from the macro evaluator is needed. - if (valueMarker.endIndex == DELIMITED_MARKER && valueTid != null && valueTid.isDelimited) { - seekPastDelimitedContainer_1_1(); - if (event == Event.NEEDS_DATA) { + if (macroInvocationId >= 0 && valueMarker.endIndex == DELIMITED_MARKER) { + if (skipMacroInvocation()) { return true; } - } else if (refillableState.pinOffset > -1) { - // Bytes in the buffer are being pinned, so buffer the remaining bytes instead of seeking past them. - if (!fillAt(refillableState.pinOffset, valueMarker.endIndex - refillableState.pinOffset)) { + } else if (valueMarker.endIndex == DELIMITED_MARKER && valueTid != null && valueTid.isDelimited) { + seekPastDelimitedContainer_1_1(); + if (event == Event.NEEDS_DATA) { return true; } - offset = valueMarker.endIndex; } else if (limit >= valueMarker.endIndex) { offset = valueMarker.endIndex; - } else if (slowSeek(valueMarker.endIndex - offset)) { - return true; + peekIndex = valueMarker.endIndex; + } else { + if (slowSeek(valueMarker.endIndex - offset)) { + return true; + } + peekIndex = valueMarker.endIndex; } - peekIndex = offset; valuePreHeaderIndex = peekIndex; if (refillableState.fillDepth > containerIndex) { // This value was filled, but was skipped. Reset the fillDepth so that the reader does not think the @@ -3369,7 +3670,11 @@ private void setCheckpointAfterValueHeader() { break; case NEEDS_INSTRUCTION: // A macro invocation header has just been read. - setCheckpoint(CheckpointLocation.BEFORE_UNANNOTATED_TYPE_ID); + // Note: e-expression checkpoints are currently treated the same as container literal checkpoints. + // this could be changed in the future, if necessary, by adding a distinct CheckpointLocation for + // AFTER_E_EXPRESSION_HEADER, and potentially a distinct Event type for START_E_EXPRESSION. We will + // defer this work until we find it's necessary. + setCheckpoint(CheckpointLocation.AFTER_CONTAINER_HEADER); break; default: throw new IllegalStateException(); @@ -3483,14 +3788,10 @@ public Event nextGroupedValue() { * @return true if there was not enough data to complete the seek; otherwise, false. */ private boolean seekToEndOfArgumentGroupPage(ArgumentGroupMarker group) { - if (isSlowMode) { - if (slowSeek(group.pageEndIndex - offset)) { - return true; - } - peekIndex = offset; - } else { - peekIndex = group.pageEndIndex; + if (isSlowMode && slowSeek(group.pageEndIndex - offset)) { + return true; } + peekIndex = group.pageEndIndex; return false; } @@ -3573,10 +3874,6 @@ private Event exitTaglessArgumentGroup(ArgumentGroupMarker group) { */ public Event exitArgumentGroup() { ArgumentGroupMarker group = argumentGroupStack[argumentGroupIndex]; - if (group.pageEndIndex >= 0 && peekIndex >= group.pageEndIndex) { - event = Event.NEEDS_INSTRUCTION; - return event; - } event = Event.NEEDS_DATA; if (group.taglessEncoding == null) { return exitTaggedArgumentGroup(group); diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java index 1a0d6f62c..18f9f2549 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableApplicationBinary.java @@ -970,6 +970,11 @@ public Event nextValue() { return event; } + @Override + protected boolean evaluateUserMacroInvocations() { + return true; + } + @Override public SymbolTable getSymbolTable() { if (cachedReadOnlySymbolTable == null) { diff --git a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java index 90ae2fb7a..86234061a 100644 --- a/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java +++ b/src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java @@ -5,7 +5,6 @@ import com.amazon.ion.Decimal; import com.amazon.ion.IntegerSize; import com.amazon.ion.IonBufferConfiguration; -import com.amazon.ion.IonCursor; import com.amazon.ion.IonException; import com.amazon.ion.IonReader; import com.amazon.ion.IonType; @@ -1209,9 +1208,7 @@ protected void installSymbols(List newSymbols) { } } - /** - * @return the {@link EncodingContext} currently active, or {@code null}. - */ + @Override EncodingContext getEncodingContext() { return encodingContext; } @@ -1733,16 +1730,7 @@ protected Macro loadMacro() { @Override protected PresenceBitmap loadPresenceBitmapIfNecessary(List signature) { - PresenceBitmap presenceBitmap = new PresenceBitmap(); - presenceBitmap.initialize(signature); - if (presenceBitmap.getByteSize() > 0) { - if (fillArgumentEncodingBitmap(presenceBitmap.getByteSize()) == IonCursor.Event.NEEDS_DATA) { - throw new UnsupportedOperationException("TODO: support continuable parsing of AEBs."); - } - presenceBitmap.readFrom(buffer, (int) valueMarker.startIndex); - presenceBitmap.validate(); - } - return presenceBitmap; + return IonReaderContinuableCoreBinary.this.loadPresenceBitmap(signature); } @Override @@ -1788,6 +1776,7 @@ protected void stepIntoEExpression() { @Override protected void stepOutOfEExpression() { + validateValueEndIndex(parent.endIndex); IonReaderContinuableCoreBinary.super.stepOutOfEExpression(); } } @@ -1975,6 +1964,15 @@ private void transcodeValueLiteral() throws IOException { } } + /** + * @return true if the reader should evaluate user macro invocations; otherwise false. Should be overridden by + * subclasses that support user macros. + */ + protected boolean evaluateUserMacroInvocations() { + // The core-level reader does not evaluate macro invocations. + return false; + } + @Override public Event nextValue() { lobBytesRead = 0; @@ -2007,15 +2005,7 @@ public Event nextValue() { event = super.nextValue(); } if (valueTid != null && valueTid.isMacroInvocation) { - if (encodingContext == EncodingContext.getDefault() && !isSystemInvocation()) { - // If the macro evaluator is null, it means there is no active macro table. Do not attempt evaluation, - // but allow the user to do a raw read of the parameters if this is a core-level reader. - // TODO this is used in the tests for the core binary reader. If it cannot be useful elsewhere, remove - // and refactor the tests. - if (this instanceof IonReaderContinuableApplicationBinary) { - throw new IonException("The user-level binary reader encountered a macro invocation without an active macro table."); - } - } else { + if (evaluateUserMacroInvocations() || isSystemInvocation()) { expressionArgsReader.beginEvaluatingMacroInvocation(macroEvaluator); isEvaluatingEExpression = true; if (evaluateNext()) { @@ -2055,6 +2045,8 @@ public Event stepIntoContainer() { @Override public Event stepOutOfContainer() { if (isEvaluatingEExpression) { + // TODO step out of the macro evaluator only if it's in a container. Otherwise, finish the evaluation early + // and step out of the parent. macroEvaluatorIonReader.stepOut(); event = Event.NEEDS_INSTRUCTION; return event; diff --git a/src/main/java/com/amazon/ion/impl/macro/EExpressionArgsReader.java b/src/main/java/com/amazon/ion/impl/macro/EExpressionArgsReader.java index 05a9c7354..e96f1eab5 100644 --- a/src/main/java/com/amazon/ion/impl/macro/EExpressionArgsReader.java +++ b/src/main/java/com/amazon/ion/impl/macro/EExpressionArgsReader.java @@ -252,16 +252,16 @@ protected void readValueAsExpression(boolean isImplicitRest, List expressions) { Macro macro = loadMacro(); + stepIntoEExpression(); List signature = macro.getSignature(); PresenceBitmap presenceBitmap = loadPresenceBitmapIfNecessary(signature); int invocationStartIndex = expressions.size(); expressions.add(Expression.Placeholder.INSTANCE); int numberOfParameters = signature.size(); - stepIntoEExpression(); for (int i = 0; i < numberOfParameters; i++) { readParameter( signature.get(i), - presenceBitmap == null ? 0 : presenceBitmap.get(i), + presenceBitmap == null ? PresenceBitmap.EXPRESSION : presenceBitmap.get(i), expressions, i == (numberOfParameters - 1) ); diff --git a/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java b/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java index f5bdc19e6..7e35cff89 100644 --- a/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java +++ b/src/test/java/com/amazon/ion/impl/EncodingDirectiveCompilationTest.java @@ -47,6 +47,7 @@ import java.util.SortedMap; import java.util.TreeMap; import java.util.function.Consumer; +import java.util.function.Supplier; import static com.amazon.ion.BitUtils.bytes; import static org.hamcrest.MatcherAssert.assertThat; @@ -1589,6 +1590,181 @@ public void taglessExpressionGroup(InputType inputType, StreamType streamType) t } } + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void taglessExpressionGroupInDelimitedStruct(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "foo", "value"); + startModuleDirectiveForDefaultModule(writer); + startMacroTable(writer); + startMacro(writer, symbols, "foo"); + writeMacroSignatureFromDatagram(writer, symbols, LOADER.load("uint8::value '*'")); + writeVariableExpansion(writer, symbols, "value"); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + Macro expectedMacro = new TemplateMacro( + Collections.singletonList(new Macro.Parameter("value", Macro.ParameterEncoding.Uint8, Macro.ParameterCardinality.ZeroOrMore)), + Collections.singletonList(new Expression.VariableRef(0)) + ); + + writer.stepInStruct(false); + { + writer.writeFieldName(SystemSymbols_1_1.EMPTY_TEXT); + writer.stepInEExp(0, false, expectedMacro); + { + writer.stepInExpressionGroup(false); + { + writer.writeInt(1); + writer.writeInt(2); + } + writer.stepOut(); + } + writer.stepOut(); + } + writer.stepOut(); + + byte[] data = getBytes(writer, out); + + IonReaderBuilder incrementalReaderBuilder = IonReaderBuilder.standard().withIncrementalReadingEnabled(true); + Supplier incrementalReaderSupplier = () -> inputType == InputType.BYTE_ARRAY + ? incrementalReaderBuilder.build(data) + : incrementalReaderBuilder.build(new ByteArrayInputStream(data)); + + // Fully traverse the value + try (IonReader reader = incrementalReaderSupplier.get()) { + assertEquals(IonType.STRUCT, reader.next()); + reader.stepIn(); + assertEquals(IonType.INT, reader.next()); + assertEquals(1, reader.intValue()); + assertEquals(IonType.INT, reader.next()); + assertEquals(2, reader.intValue()); + assertNull(reader.next()); + reader.stepOut(); + assertNull(reader.next()); + } + + // Skip by early-step out + try (IonReader reader = incrementalReaderSupplier.get()) { + assertEquals(IonType.STRUCT, reader.next()); + reader.stepIn(); + reader.stepOut(); + assertNull(reader.next()); + } + + // Skip by step-over + try (IonReader reader = incrementalReaderSupplier.get()) { + assertEquals(IonType.STRUCT, reader.next()); + assertNull(reader.next()); + } + } + + @ParameterizedTest(name = "{0},{1}") + @MethodSource("allCombinations") + public void taglessParametersInDelimitedStruct(InputType inputType, StreamType streamType) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IonRawWriter_1_1 writer = streamType.newWriter(out); + + writer.writeIVM(); + Map symbols = initializeSymbolTable(writer, "foo", "u8", "i16", "u32", "i64"); + startModuleDirectiveForDefaultModule(writer); + startMacroTable(writer); + startMacro(writer, symbols, "foo"); + writeMacroSignatureFromDatagram( + writer, + symbols, + LOADER.load("uint8::u8"), + LOADER.load("int16::i16"), + LOADER.load("uint32::u32"), + LOADER.load("int64::i64") + ); + writer.stepInList(true); + writeVariableExpansion(writer, symbols, "u8"); + writeVariableExpansion(writer, symbols, "i16"); + writeVariableExpansion(writer, symbols, "u32"); + writeVariableExpansion(writer, symbols, "i64"); + writer.stepOut(); + endMacro(writer); + endMacroTable(writer); + endEncodingDirective(writer); + + Macro expectedMacro = new TemplateMacro( + Arrays.asList( + new Macro.Parameter("u8", Macro.ParameterEncoding.Uint8, Macro.ParameterCardinality.ExactlyOne), + new Macro.Parameter("i16", Macro.ParameterEncoding.Int16, Macro.ParameterCardinality.ExactlyOne), + new Macro.Parameter("u32", Macro.ParameterEncoding.Uint32, Macro.ParameterCardinality.ExactlyOne), + new Macro.Parameter("i64", Macro.ParameterEncoding.Int64, Macro.ParameterCardinality.ExactlyOne) + ), + Arrays.asList( + new Expression.ListValue(Collections.emptyList(), 0, 5), + new Expression.VariableRef(0), + new Expression.VariableRef(1), + new Expression.VariableRef(2), + new Expression.VariableRef(3) + ) + ); + + writer.stepInStruct(false); + { + writer.writeFieldName(SystemSymbols_1_1.EMPTY_TEXT); + writer.stepInEExp(0, false, expectedMacro); + { + writer.writeInt(1); + writer.writeInt(2); + writer.writeInt(3); + writer.writeInt(4); + } + writer.stepOut(); + } + writer.stepOut(); + byte[] data = getBytes(writer, out); + + IonReaderBuilder incrementalReaderBuilder = IonReaderBuilder.standard().withIncrementalReadingEnabled(true); + Supplier incrementalReaderSupplier = () -> inputType == InputType.BYTE_ARRAY + ? incrementalReaderBuilder.build(data) + : incrementalReaderBuilder.build(new ByteArrayInputStream(data)); + + // Fully traverse the value + try (IonReader reader = incrementalReaderSupplier.get()) { + assertEquals(IonType.STRUCT, reader.next()); + reader.stepIn(); + assertEquals(IonType.LIST, reader.next()); + assertEquals("", reader.getFieldName()); + reader.stepIn(); + assertEquals(IonType.INT, reader.next()); + assertEquals(1, reader.intValue()); + assertEquals(IonType.INT, reader.next()); + assertEquals(2, reader.intValue()); + assertEquals(IonType.INT, reader.next()); + assertEquals(3, reader.intValue()); + assertEquals(IonType.INT, reader.next()); + assertEquals(4, reader.intValue()); + assertNull(reader.next()); + reader.stepOut(); + assertNull(reader.next()); + reader.stepOut(); + assertNull(reader.next()); + } + + // Skip by early-step out + try (IonReader reader = incrementalReaderSupplier.get()) { + assertEquals(IonType.STRUCT, reader.next()); + reader.stepIn(); + reader.stepOut(); + assertNull(reader.next()); + } + + // Skip by step-over + try (IonReader reader = incrementalReaderSupplier.get()) { + assertEquals(IonType.STRUCT, reader.next()); + assertNull(reader.next()); + } + } + private static void writeSymbolTableEExpression(boolean isAppend, IonRawWriter_1_1 writer, String... symbols) { writer.stepInEExp(isAppend ? SystemMacro.AddSymbols : SystemMacro.SetSymbols); writer.stepInExpressionGroup(false); diff --git a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java index c9dc13093..d6efcb35f 100644 --- a/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonCursorBinaryTest.java @@ -952,6 +952,32 @@ static ExpectationProvider nextMacroInvocation(in )); } + /** + * Provides Expectations that step the reader into the macro invocation on which it is currently positioned. + */ + static ExpectationProvider stepInMacroInvocation() { + return consumer -> consumer.accept(new Expectation<>( + "stepIn macro invocation", + cursor -> { + cursor.stepIntoEExpression(); + assertEquals(NEEDS_INSTRUCTION, cursor.getCurrentEvent()); + } + )); + } + + /** + * Provides Expectations that step the reader out of the macro invocation that it is currently reading. + */ + static ExpectationProvider stepOutMacroInvocation() { + return consumer -> consumer.accept(new Expectation<>( + "stepOut macro invocation", + cursor -> { + cursor.stepOutOfEExpression(); + assertEquals(NEEDS_INSTRUCTION, cursor.getCurrentEvent()); + } + )); + } + /** * Provides Expectations that advance the reader to the next tagless value and verify that it has the given * attributes. @@ -1066,12 +1092,14 @@ public void taglessInts(boolean constructFromBytes) throws Exception { assertSequence( cursor, nextMacroInvocation(0), + stepInMacroInvocation(), nextTaglessValue(TaglessEncoding.UINT8, IonType.INT, 5, 6), nextTaglessValue(TaglessEncoding.INT16, IonType.INT, 6, 8), nextTaglessValue(TaglessEncoding.UINT32, IonType.INT, 8, 12), nextTaglessValue(TaglessEncoding.INT64, IonType.INT, 12, 20), nextTaglessValue(TaglessEncoding.FLEX_UINT, IonType.INT, 20, 23), nextTaglessValue(TaglessEncoding.FLEX_INT, IonType.INT, 23, 26), + stepOutMacroInvocation(), endStream() ); } @@ -1090,9 +1118,11 @@ public void taglessFloats(boolean constructFromBytes) throws Exception { assertSequence( cursor, nextMacroInvocation(0), + stepInMacroInvocation(), nextTaglessValue(TaglessEncoding.FLOAT16, IonType.FLOAT, 5, 7), nextTaglessValue(TaglessEncoding.FLOAT32, IonType.FLOAT, 7, 11), nextTaglessValue(TaglessEncoding.FLOAT64, IonType.FLOAT, 11, 19), + stepOutMacroInvocation(), endStream() ); } @@ -1112,9 +1142,11 @@ public void taglessCompactSymbols(boolean constructFromBytes) throws Exception { assertSequence( cursor, nextMacroInvocation(0), + stepInMacroInvocation(), nextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 6, 10), nextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 10, 11), nextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 13, 13), + stepOutMacroInvocation(), endStream() ); } @@ -1140,12 +1172,14 @@ public void taglessValuesInterspersedWithTaggedValues(boolean constructFromBytes assertSequence( cursor, nextMacroInvocation(0), + stepInMacroInvocation(), nextTaglessValue(TaglessEncoding.UINT8, IonType.INT, 5, 6), nextTaggedValue(IonType.INT, 7, 7), nextTaglessValue(TaglessEncoding.FLOAT32, IonType.FLOAT, 7, 11), nextTaggedValue(IonType.FLOAT, 12, 16), nextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 17, 21), nextTaggedValue(IonType.SYMBOL, 22, 26), + stepOutMacroInvocation(), endStream() ); } @@ -1159,12 +1193,14 @@ public void fillTaglessValuesInterspersedWithTaggedValues(boolean constructFromB assertSequence( cursor, nextMacroInvocation(0), + stepInMacroInvocation(), fillNextTaglessValue(TaglessEncoding.UINT8, IonType.INT, 5, 6), fillScalar(7, 7), type(IonType.INT), fillNextTaglessValue(TaglessEncoding.FLOAT32, IonType.FLOAT, 7, 11), fillScalar(12, 16), type(IonType.FLOAT), fillNextTaglessValue(TaglessEncoding.FLEX_SYM, IonType.SYMBOL, 17, 21), fillScalar(22, 26), type(IonType.SYMBOL), + stepOutMacroInvocation(), endStream() ); } @@ -1241,7 +1277,7 @@ private static void executeIncrementally(byte[] data, List instruct public void fillTaglessValuesInterspersedWithTaggedValuesIncremental() throws Exception { byte[] data = taggedAndTaglessValues(); List instructions = Arrays.asList( - instruction(IonCursorBinary::nextValue, macroInvocation(0)), + instruction(IonCursorBinary::nextValue, allOf(macroInvocation(0), stepInMacroInvocation())), instruction( cursor -> cursor.nextTaglessValue(TaglessEncoding.UINT8), valueMarker(IonType.INT, 5, 6) @@ -1291,7 +1327,15 @@ public void fillTaglessValuesInterspersedWithTaggedValuesIncremental() throws Ex valueReady(IonType.SYMBOL, 22, 26) ), // This is the end of the stream, so the response is not used. - instruction(IonCursorBinary::nextValue, null) + instruction( + cursor -> { + if (cursor.stepOutOfEExpression() == NEEDS_DATA) { + return NEEDS_DATA; + } + return cursor.nextValue(); + }, + null + ) ); executeIncrementally(data, instructions); } @@ -1300,7 +1344,7 @@ public void fillTaglessValuesInterspersedWithTaggedValuesIncremental() throws Ex public void skipTaglessValuesInterspersedWithTaggedValuesIncremental() throws Exception { byte[] data = taggedAndTaglessValues(); List instructions = Arrays.asList( - instruction(IonCursorBinary::nextValue, macroInvocation(0)), + instruction(IonCursorBinary::nextValue, allOf(macroInvocation(0), stepInMacroInvocation())), instruction( cursor -> cursor.nextTaglessValue(TaglessEncoding.UINT8), // 0xFF is skipped. @@ -1333,7 +1377,15 @@ public void skipTaglessValuesInterspersedWithTaggedValuesIncremental() throws Ex valueMarker(IonType.SYMBOL, 9, 13) ), // This is the end of the stream, so the response is not used. - instruction(IonCursorBinary::nextValue, null) + instruction( + cursor -> { + if (cursor.stepOutOfEExpression() == NEEDS_DATA) { + return NEEDS_DATA; + } + return cursor.nextValue(); + }, + null + ) ); executeIncrementally(data, instructions); } @@ -1394,8 +1446,10 @@ private static void assertAEBThenIntZero(byte[] data, boolean constructFromBytes assertSequence( cursor, nextMacroInvocation(0x13), valueMarker(null, 5, -1), + stepInMacroInvocation(), fillArgumentEncodingBitmap(numberOfBytesInAEB, 5, expectedAEBEndIndex), nextTaggedValue(IonType.INT, expectedAEBEndIndex + 1, expectedAEBEndIndex + 1), + stepOutMacroInvocation(), endStream() ); } @@ -1406,11 +1460,19 @@ private static void assertAEBThenIntZeroIncremental(byte[] data, int numberOfByt // the AEB starts at index 5. int expectedAEBEndIndex = 5 + numberOfBytesInAEB; List instructions = Arrays.asList( - instruction(IonCursorBinary::nextValue, macroInvocation(0x13)), + instruction(IonCursorBinary::nextValue, allOf(macroInvocation(0x13), stepInMacroInvocation())), instruction(cursor -> cursor.fillArgumentEncodingBitmap(numberOfBytesInAEB), valueMarker(null, 5, expectedAEBEndIndex)), instruction(IonCursorBinary::nextValue, valueMarker(IonType.INT, expectedAEBEndIndex + 1, expectedAEBEndIndex + 1)), // This is the end of the stream, so the response is not used. - instruction(IonCursorBinary::nextValue, null) + instruction( + cursor -> { + if (cursor.stepOutOfEExpression() == NEEDS_DATA) { + return NEEDS_DATA; + } + return cursor.nextValue(); + }, + null + ) ); executeIncrementally(data, instructions); } @@ -1496,12 +1558,14 @@ public void fullyTraverseTaglessArgumentGroup(boolean constructFromBytes) throws assertSequence( cursor, nextMacroInvocation(0x13), valueMarker(null, 5, -1), + stepInMacroInvocation(), fillArgumentEncodingBitmap(1, 5, 6), enterTaglessArgumentGroup(TaglessEncoding.UINT8), nextGroupedValue(IonType.INT, 7, 8), nextGroupedValue(IonType.INT, 9, 10), endOfGroup(), exitArgumentGroup(), + stepOutMacroInvocation(), endStream() ); } @@ -1528,6 +1592,7 @@ public void fullyTraverseTaggedPrefixedArgumentGroup(boolean constructFromBytes) assertSequence( cursor, nextMacroInvocation(0x13), valueMarker(null, 5, -1), + stepInMacroInvocation(), fillArgumentEncodingBitmap(1, 5, 6), enterTaggedArgumentGroup(), nextGroupedValue(IonType.INT, 8, 8), @@ -1538,6 +1603,7 @@ public void fullyTraverseTaggedPrefixedArgumentGroup(boolean constructFromBytes) stepOutOfContainer(), endOfGroup(), exitArgumentGroup(), + stepOutMacroInvocation(), endStream() ); } @@ -1565,6 +1631,7 @@ public void fullyTraverseTaggedDelimitedArgumentGroup(boolean constructFromBytes assertSequence( cursor, nextMacroInvocation(0x13), valueMarker(null, 5, -1), + stepInMacroInvocation(), fillArgumentEncodingBitmap(1, 5, 6), enterTaggedArgumentGroup(), nextGroupedValue(IonType.INT, 8, 8), @@ -1575,6 +1642,7 @@ public void fullyTraverseTaggedDelimitedArgumentGroup(boolean constructFromBytes stepOutOfContainer(), endOfGroup(), exitArgumentGroup(), + stepOutMacroInvocation(), endStream() ); } @@ -1600,6 +1668,7 @@ public void emptyArgumentGroups(boolean constructFromBytes) throws Exception { assertSequence( cursor, nextMacroInvocation(0x13), valueMarker(null, 5, -1), + stepInMacroInvocation(), fillArgumentEncodingBitmap(1, 5, 6), enterTaggedArgumentGroup(), endOfGroup(), @@ -1607,6 +1676,7 @@ public void emptyArgumentGroups(boolean constructFromBytes) throws Exception { enterTaglessArgumentGroup(TaglessEncoding.UINT8), endOfGroup(), exitArgumentGroup(), + stepOutMacroInvocation(), endStream() ); } @@ -1643,6 +1713,7 @@ public void twoArgumentGroupsFollowedBySingleValue(boolean constructFromBytes) t assertSequence( cursor, nextMacroInvocation(0x13), valueMarker(null, 5, -1), + stepInMacroInvocation(), fillArgumentEncodingBitmap(1, 5, 6), enterTaggedArgumentGroup(), nextGroupedValue(IonType.INT, 8, 8), @@ -1658,6 +1729,7 @@ public void twoArgumentGroupsFollowedBySingleValue(boolean constructFromBytes) t nextGroupedValue(IonType.INT, 16, 17), endOfGroup(), exitArgumentGroup(), + stepOutMacroInvocation(), container( scalar(), valueMarker(IonType.INT, 20, 20) ), @@ -1670,7 +1742,7 @@ public void twoArgumentGroupsFollowedBySingleValue(boolean constructFromBytes) t public void twoArgumentGroupsFollowedBySingleValueIncremental() throws Exception { byte[] data = twoArgumentGroupsFollowedBySingleValue(); List instructions = Arrays.asList( - instruction(IonCursorBinary::nextValue, macroInvocation(0x13)), + instruction(IonCursorBinary::nextValue, allOf(macroInvocation(0x13), stepInMacroInvocation())), instruction(cursor -> cursor.fillArgumentEncodingBitmap(1), valueMarker(null, 5, 6)), instruction(IonCursorBinary::enterTaggedArgumentGroup, event(NEEDS_INSTRUCTION)), instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 8, 8)), @@ -1719,6 +1791,9 @@ public void twoArgumentGroupsFollowedBySingleValueIncremental() throws Exception ), instruction( cursor -> { + if (cursor.stepOutOfEExpression() == NEEDS_DATA) { + return NEEDS_DATA; + } if (cursor.nextValue() == NEEDS_DATA) { return NEEDS_DATA; } @@ -1751,12 +1826,14 @@ public void skipOverArgumentGroups(boolean constructFromBytes) throws Exception assertSequence( cursor, nextMacroInvocation(0x13), valueMarker(null, 5, -1), + stepInMacroInvocation(), fillArgumentEncodingBitmap(1, 5, 6), enterTaggedArgumentGroup(), nextGroupedValue(IonType.INT, 8, 8), exitArgumentGroup(), // Early exit enterTaglessArgumentGroup(TaglessEncoding.UINT8), exitArgumentGroup(), // Skip over group + stepOutMacroInvocation(), container( scalar(), valueMarker(IonType.INT, 20, 20) ), @@ -1769,7 +1846,7 @@ public void skipOverArgumentGroups(boolean constructFromBytes) throws Exception public void skipOverArgumentGroupsIncremental() throws Exception { byte[] data = twoArgumentGroupsFollowedBySingleValue(); List instructions = Arrays.asList( - instruction(IonCursorBinary::nextValue, macroInvocation(0x13)), + instruction(IonCursorBinary::nextValue, allOf(macroInvocation(0x13), stepInMacroInvocation())), instruction(cursor -> cursor.fillArgumentEncodingBitmap(1), valueMarker(null, 5, 6)), instruction(IonCursorBinary::enterTaggedArgumentGroup, event(NEEDS_INSTRUCTION)), instruction(IonCursorBinary::nextGroupedValue, valueMarker(IonType.INT, 8, 8)), @@ -1777,14 +1854,79 @@ public void skipOverArgumentGroupsIncremental() throws Exception { instruction(IonCursorBinary::exitArgumentGroup, event(NEEDS_INSTRUCTION)), instruction(cursor -> cursor.enterTaglessArgumentGroup(TaglessEncoding.UINT8), event(NEEDS_INSTRUCTION)), // Skip all arguments in the group - instruction(IonCursorBinary::exitArgumentGroup, event(NEEDS_INSTRUCTION)), - instruction(IonCursorBinary::nextValue, valueMarker(IonType.LIST, 16, 17)), + instruction( + cursor -> { + if (cursor.exitArgumentGroup() == NEEDS_DATA) { + return NEEDS_DATA; + } + return cursor.stepOutOfEExpression(); + }, + event(NEEDS_INSTRUCTION) + ), + instruction(IonCursorBinary::nextValue, valueMarker(IonType.LIST, 14, 15)), // This is the end of the stream, so the response is not used. instruction(IonCursorBinary::nextValue, null) ); executeIncrementally(data, instructions); } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void earlyStepOutPrefixedMacroInvocation(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "F5 | Prefixed macro invocation \n" + + "09 | Address 4 (repeat) \n" + + "05 | Length 2 \n" + + "00 | AEB 0 \n" + + "60 | 0 \n" + + "93 61 62 63 | \"abc\" \n" + ))); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertEquals(NEEDS_INSTRUCTION, cursor.nextValue()); + cursor.stepIntoEExpression(); + cursor.stepOutOfEExpression(); + assertEquals(START_SCALAR, cursor.nextValue()); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void earlyStepOutMacroInvocationWithTaglessArgument(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "F9 6E 61 6D 65 | FlexSym with inline text \"name\" \n" + + "60 | 0 \n" + ))); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertEquals(NEEDS_INSTRUCTION, cursor.nextValue()); + cursor.stepIntoEExpression(); + cursor.nextTaglessValue(TaglessEncoding.FLEX_SYM); + cursor.stepOutOfEExpression(); + assertEquals(START_SCALAR, cursor.nextValue()); + assertEquals(IonType.INT, cursor.valueTid.type); + } + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void earlyStepOutMacroInvocationWithDelimitedArgument(boolean constructFromBytes) throws Exception { + byte[] data = withIvm(1, hexStringToByteArray(cleanCommentedHexBytes( + "13 | Opcode 0x13 -> macro ID 0x13 \n" + + "F1 60 F0 | Delimited list [0] \n" + + "93 61 62 63 | \"abc\" \n" + ))); + try (IonCursorBinary cursor = initializeCursor(STANDARD_BUFFER_CONFIGURATION, constructFromBytes, data)) { + assertEquals(NEEDS_INSTRUCTION, cursor.nextValue()); + cursor.stepIntoEExpression(); + assertEquals(START_CONTAINER, cursor.nextValue()); + cursor.stepOutOfEExpression(); + assertEquals(START_SCALAR, cursor.nextValue()); + assertEquals(IonType.STRING, cursor.valueTid.type); + } + } + + // TODO Nest argument groups >8 deep, exercising argument group stack growth. // TODO Add more incremental tests for various argument group combinations, improving coverage of NEEDS_DATA cases. // TODO Extend a tagged prefixed argument group page beyond the current buffer limit. In slow mode, this should diff --git a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java index a1585efba..a15fba49c 100644 --- a/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java +++ b/src/test/java/com/amazon/ion/impl/IonReaderContinuableTopLevelBinaryTest.java @@ -6111,5 +6111,345 @@ public void readIon11SymbolTableWithFlexUIntFieldNames(boolean constructFromByte closeAndCount(); } + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void skipTopLevelMacroInvocation(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0x04, 0x00, // Macro invocation 4 (repeat), AEB 0 + 0x60, // 0 + 0x93, 'a', 'b', 'c' + ), + constructFromBytes + ); + // Note: because the macro expands to nothing, it requires only one next() to position on the value that + // follows. + assertSequence( + next(IonType.STRING), + stringValue("abc"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void skipTopLevelPrefixedMacroInvocation(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0xF5, 0x09, 0x05, 0x00, // Macro invocation, address 4 (repeat), length 2, AEB 0 + 0x60, // 0 + 0x93, 'a', 'b', 'c' + ), + constructFromBytes + ); + // Note: because the macro expands to nothing, it requires only one next() to position on the value that + // follows. + assertSequence( + next(IonType.STRING), + stringValue("abc"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void topLevelPrefixedMacroInvocationIsInvalidDueToLengthMismatch(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0xF5, 0x09, 0x0B, // Macro invocation, address 4 (repeat), length 5 (past the end of the invocation) + 0x00, // AEB 0, indicating that the second argument isn't present, leaving only one argument. + 0x60, // 0, the argument + 0x93, 'a', 'b', 'c' + ), + constructFromBytes + ); + assertThrows(IonException.class, () -> reader.next()); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void earlyStepOutDelimitedStructThatContainsMacroInvocation(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0xF3, // Delimited struct start + 0xFF, 'a', // FlexSym field name 'a' + 0x04, 0x00, // Macro invocation 4 (repeat), AEB 0 + 0x60, // 0 + 0x01, 0xF0 // Delimited struct end + ), + constructFromBytes + ); + assertSequence( + next(IonType.STRUCT), + STEP_IN, + STEP_OUT, + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void earlyStepOutDelimitedStructThatContainsPrefixedMacroInvocation(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0xF3, // Delimited struct start + 0xFF, 'a', // FlexSym field name 'a' + 0xF5, 0x09, 0x05, 0x00, // Macro invocation, address 4 (repeat), length 2, AEB 0 + 0x60, // 0 + 0x01, 0xF0, // Delimited struct end + 0x93, 'a', 'b', 'c' + ), + constructFromBytes + ); + assertSequence( + next(IonType.STRUCT), + STEP_IN, + STEP_OUT, + next(IonType.STRING), + stringValue("abc"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void stepOverDelimitedStructThatContainsMacroInvocation(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0xF3, // Delimited struct start + 0xFF, 'a', // FlexSym field name 'a' + 0x04, 0x00, // Macro invocation 4 (repeat), AEB 0 + 0x60, // 0 + 0x01, 0xF0, // Delimited struct end + 0x93, 'a', 'b', 'c' + ), + constructFromBytes + ); + assertSequence( + next(IonType.STRUCT), + next(IonType.STRING), + stringValue("abc"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void traverseDelimitedStructThatContainsMacroInvocation(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0xF3, // Delimited struct start + 0xFF, 'a', // FlexSym field name 'a' + 0x04, 0x00, // Macro invocation 4 (repeat), AEB 0 + 0x60, // 0 + 0x01, 0xF0, // Delimited struct end + 0x93, 'a', 'b', 'c' + ), + constructFromBytes + ); + assertSequence( + next(IonType.STRUCT), + STEP_IN, + next(null), // This is (:repeat 0), which expands to nothing. + STEP_OUT, + next(IonType.STRING), + stringValue("abc"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void stepOverDelimitedStructThatContainsMacroInvocationWithTaggedExpressionGroup(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0xF3, // Delimited struct start + 0xFF, 'a', // FlexSym field name 'a' + 0x06, 0x02, // Macro invocation 6 (delta), AEB 2 (group) + 0x0D, // 6-byte expression group + 0x61, 0x01, // 1 + 0x61, 0x02, // 2 + 0x61, 0x03, // 3 + 0x01, 0xF0, // Delimited struct end + 0x93, 'a', 'b', 'c' + ), + constructFromBytes + ); + assertSequence( + next(IonType.STRUCT), + next(IonType.STRING), + stringValue("abc"), + next(null) + ); + closeAndCount(); + } + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void earlyStepOutDelimitedStructThatContainsMacroInvocationWithTaggedExpressionGroup(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0xF3, // Delimited struct start + 0xFF, 'a', // FlexSym field name 'a' + 0x06, 0x02, // Macro invocation 6 (delta), AEB 2 (group) + 0x0D, // 6-byte expression group + 0x61, 0x01, // 1 + 0x61, 0x02, // 2 + 0x61, 0x03, // 3 + 0x01, 0xF0, // Delimited struct end + 0x93, 'a', 'b', 'c' + ), + constructFromBytes + ); + assertSequence( + next(IonType.STRUCT), + STEP_IN, + STEP_OUT, + next(IonType.STRING), + stringValue("abc"), + next(null) + ); + closeAndCount(); + } + + // TODO add a test that modifies the above to step out after beginning macro evaluation but before exhausting it. + // This should end evaluation early and step out of the struct. + + @ParameterizedTest(name = "constructFromBytes={0}") + @ValueSource(booleans = {true, false}) + public void traverseDelimitedStructThatContainsMacroInvocationWithTaggedExpressionGroup(boolean constructFromBytes) throws Exception { + reader = readerForIon11(bytes( + 0xF3, // Delimited struct start + 0xFF, 'a', // FlexSym field name 'a' + 0x06, 0x02, // Macro invocation 6 (delta), AEB 2 (group) + 0x0D, // 6-byte expression group + 0x61, 0x01, // 1 + 0x61, 0x02, // 2 + 0x61, 0x03, // 3 + 0x01, 0xF0, // Delimited struct end + 0x93, 'a', 'b', 'c' + ), + constructFromBytes + ); + assertSequence( + next(IonType.STRUCT), + STEP_IN, + next(IonType.INT), + bigIntegerValue(BigInteger.valueOf(1)), // Note: the current implementation of 'delta' operates on BigInteger only. + next(IonType.INT), + bigIntegerValue(BigInteger.valueOf(3)), + next(IonType.INT), + bigIntegerValue(BigInteger.valueOf(6)), + next(null), + STEP_OUT, + next(IonType.STRING), + stringValue("abc"), + next(null) + ); + closeAndCount(); + } + + @Test + public void fillNestedDelimitedLists() throws Exception { + readerBuilder = readerBuilder.withBufferConfiguration( + IonBufferConfiguration.Builder.standard() + .withInitialBufferSize(6) // This ensures the entire delimited container is not fully buffered up front. + .onData(byteCountingHandler) + .build() + ); + reader = readerForIon11(bytes( + 0xF1, 0xF1, 0xF1, 0xF0, 0xF0, 0xF0 + ), + false + ); + assertSequence( + next(IonType.LIST), + STEP_IN, + next(IonType.LIST), + STEP_IN, + next(IonType.LIST), + STEP_IN, + next(null), + STEP_OUT, + next(null), + STEP_OUT, + next(null), + STEP_OUT, + next(null) + ); + closeAndCount(); + } + + @Test + public void fillNestedEmptyDelimitedList() throws Exception { + readerBuilder = readerBuilder.withBufferConfiguration( + IonBufferConfiguration.Builder.standard() + .withInitialBufferSize(7) // This ensures the entire delimited container is not fully buffered up front. + .onData(byteCountingHandler) + .build() + ); + reader = readerForIon11(bytes( + 0xF1, 0xF1, 0xF0, 0xF0, + 0x60 + ), + false + ); + assertSequence( + next(IonType.LIST), + next(IonType.INT), + next(null) + ); + closeAndCount(); + } + + @Test + public void fillMultipleNestedDelimitedLists() throws Exception { + readerBuilder = readerBuilder.withBufferConfiguration( + IonBufferConfiguration.Builder.standard() + .withInitialBufferSize(7) // This ensures the entire delimited container is not fully buffered up front. + .onData(byteCountingHandler) + .build() + ); + reader = readerForIon11(bytes( + 0xF2, + 0xF1, 0xF1, 0xF1, 0xF0, 0xF0, 0xF0, + 0xF1, 0xF1, 0xF1, 0xF0, 0xF0, 0xF0, + 0xF0 + ), + false + ); + assertSequence( + next(IonType.SEXP), + STEP_IN, + next(IonType.LIST), + STEP_IN, + next(IonType.LIST), + STEP_IN, + next(IonType.LIST), + STEP_IN, + next(null), + STEP_OUT, + next(null), + STEP_OUT, + next(null), + STEP_OUT, + next(IonType.LIST), + STEP_IN, + next(IonType.LIST), + STEP_IN, + next(IonType.LIST), + STEP_IN, + next(null), + STEP_OUT, + next(null), + STEP_OUT, + next(null), + STEP_OUT, + next(null), + STEP_OUT, + next(null) + ); + closeAndCount(); + } + + // TODO test continuable reading and skipping of non-prefixed macro invocations. + // TODO test skipping macro invocations (especially length-prefixed ones) that expand to system values. // TODO Ion 1.1 symbol tables with all kinds of annotation encodings (opcodes E4 - E9, inline and SID) }