Skip to content

Commit

Permalink
Merge branch 'ion-11-encoding' into macro-evaluation-fixes-temp
Browse files Browse the repository at this point in the history
  • Loading branch information
popematt authored Dec 4, 2024
2 parents a1debeb + 44da868 commit 67ad344
Show file tree
Hide file tree
Showing 19 changed files with 1,180 additions and 260 deletions.
41 changes: 41 additions & 0 deletions src/main/java/com/amazon/ion/MacroAwareIonReader.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion

import java.io.Closeable
import java.io.IOException

/**
* An enhancement to an Ion reader that supports macro-aware transcoding.
*/
interface MacroAwareIonReader : Closeable {

/**
* Performs a macro-aware transcode of the stream being read by this reader.
* For Ion 1.0 streams, this functions similarly to providing a system-level
* [IonReader] to [IonWriter.writeValues]. For Ion 1.1 streams, the transcoded
* stream will include the same symbol tables, encoding directives, and
* e-expression invocations as the source stream. In both cases, the
* transcoded stream will be data-model equivalent to the source stream.
*
* The following limitations should be noted:
* 1. Encoding directives with no effect on the encoding context may be
* elided from the transcoded stream. An example would be an encoding
* directive that re-exports the existing context but adds no new
* macros or new symbols.
* 2. When transcoding from text to text, comments will not be preserved.
* 3. Open content in encoding directives (e.g. macro invocations that
* expand to nothing) will not be preserved.
* 4. Granular details of the binary encoding, like inlining vs. interning
* for a particular symbol or length-prefixing vs. delimiting for a
* particular container, may not be preserved. It is up to the user
* to provide a writer configured to match these details if important.
*
* To get a [MacroAwareIonReader] use `_Private_IonReaderBuilder.buildMacroAware`.
* To get a [MacroAwareIonWriter] use [IonEncodingVersion.textWriterBuilder] or
* [IonEncodingVersion.binaryWriterBuilder].
* @param writer the writer to which the reader's stream will be transcoded.
*/
@Throws(IOException::class)
fun transcodeTo(writer: MacroAwareIonWriter)
}
29 changes: 29 additions & 0 deletions src/main/java/com/amazon/ion/MacroAwareIonWriter.kt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,35 @@ import com.amazon.ion.impl.macro.*
*/
interface MacroAwareIonWriter : IonWriter {

/**
* Starts a new encoding segment with an Ion version marker, flushing
* the previous segment (if any) and resetting the encoding context.
*/
fun startEncodingSegmentWithIonVersionMarker()

/**
* Starts a new encoding segment with an encoding directive, flushing
* the previous segment (if any).
* @param macros the macros added in the new segment.
* @param isMacroTableAppend true if the macros from the previous segment
* are to remain available.
* @param symbols the symbols added in the new segment.
* @param isSymbolTableAppend true if the macros from the previous
* segment are to remain available.
* @param encodingDirectiveAlreadyWritten true if the encoding directive
* that begins the new segment has already been written to this writer.
* If false, the writer will write an encoding directive consistent
* with the arguments provided to this method, using verbose
* s-expression syntax.
*/
fun startEncodingSegmentWithEncodingDirective(
macros: Map<MacroRef, Macro>,
isMacroTableAppend: Boolean,
symbols: List<String>,
isSymbolTableAppend: Boolean,
encodingDirectiveAlreadyWritten: Boolean
)

/**
* Starts writing a macro invocation, adding it to the macro table, if needed.
*/
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/com/amazon/ion/impl/IonCursorBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,8 @@ private void uncheckedReadFieldName_1_1() {
fieldSid = (int) uncheckedReadFlexSym_1_1(fieldTextMarker);
} else {
fieldSid = (int) uncheckedReadFlexUInt_1_1();
fieldTextMarker.startIndex = -1;
fieldTextMarker.endIndex = fieldSid;
}
}
}
Expand Down Expand Up @@ -1794,6 +1796,8 @@ private boolean slowReadFieldName_1_1() {
return slowReadFieldNameFlexSym_1_1();
} else {
fieldSid = (int) slowReadFlexUInt_1_1();
fieldTextMarker.startIndex = -1;
fieldTextMarker.endIndex = fieldSid;
return fieldSid < 0;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -943,33 +943,6 @@ private enum State {
// The current state.
private State state = State.READING_VALUE;

/**
* @return true if current value has a sequence of annotations that begins with `$ion_symbol_table`; otherwise,
* false.
*/
boolean startsWithIonSymbolTable() {
if (minorVersion == 0 && annotationSequenceMarker.startIndex >= 0) {
long savedPeekIndex = peekIndex;
peekIndex = annotationSequenceMarker.startIndex;
int sid = readVarUInt_1_0();
peekIndex = savedPeekIndex;
return ION_SYMBOL_TABLE_SID == sid;
} else if (minorVersion == 1) {
Marker marker = annotationTokenMarkers.get(0);
return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION_SYMBOL_TABLE);
}
return false;
}

/**
* @return true if the reader is positioned on a symbol table; otherwise, false.
*/
private boolean isPositionedOnSymbolTable() {
return hasAnnotations &&
super.getType() == IonType.STRUCT &&
startsWithIonSymbolTable();
}

@Override
public Event nextValue() {
Event event;
Expand Down
141 changes: 136 additions & 5 deletions src/main/java/com/amazon/ion/impl/IonReaderContinuableCoreBinary.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import com.amazon.ion.IonBufferConfiguration;
import com.amazon.ion.IonCursor;
import com.amazon.ion.IonException;
import com.amazon.ion.IonReader;
import com.amazon.ion.IonType;
import com.amazon.ion.MacroAwareIonReader;
import com.amazon.ion.MacroAwareIonWriter;
import com.amazon.ion.SymbolTable;
import com.amazon.ion.SymbolToken;
import com.amazon.ion.Timestamp;
Expand All @@ -21,6 +24,7 @@
import com.amazon.ion.impl.macro.EncodingContext;
import com.amazon.ion.impl.macro.Expression;
import com.amazon.ion.impl.macro.EExpressionArgsReader;
import com.amazon.ion.impl.macro.IonReaderFromReaderAdapter;
import com.amazon.ion.impl.macro.Macro;
import com.amazon.ion.impl.macro.MacroCompiler;
import com.amazon.ion.impl.macro.MacroTable;
Expand All @@ -32,6 +36,7 @@
import com.amazon.ion.impl.macro.MacroRef;
import com.amazon.ion.impl.macro.SystemMacro;

import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
Expand All @@ -41,21 +46,22 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;

import static com.amazon.ion.SystemSymbols.ION_ENCODING;
import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID;
import static com.amazon.ion.impl.IonReaderContinuableApplicationBinary.SYMBOLS_LIST_INITIAL_CAPACITY;
import static com.amazon.ion.impl.IonTypeID.SYSTEM_SYMBOL_VALUE;
import static com.amazon.ion.impl.bin.Ion_1_1_Constants.*;

/**
* An IonCursor capable of raw parsing of binary Ion streams.
*/
class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReaderContinuableCore {
class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReaderContinuableCore, MacroAwareIonReader {

// The UTF-8 bytes that represent the text "$ion_encoding" for quick byte-by-byte comparisons.
private static final byte[] ION_ENCODING_UTF8 = ION_ENCODING.getBytes(StandardCharsets.UTF_8);
Expand Down Expand Up @@ -132,6 +138,9 @@ class IonReaderContinuableCoreBinary extends IonCursorBinary implements IonReade
// Adapts this reader for use in code that supports multiple reader types.
private final ReaderAdapter readerAdapter = new ReaderAdapterContinuable(this);

// Adapts this reader for use in code that supports IonReader.
private final IonReader asIonReader = new IonReaderFromReaderAdapter(readerAdapter);

// Reads encoding directives from the stream.
private final EncodingDirectiveReader encodingDirectiveReader = new EncodingDirectiveReader();

Expand Down Expand Up @@ -1105,7 +1114,7 @@ boolean startsWithIonEncoding() {
public String getSymbol(int sid) {
// Only symbol IDs declared in Ion 1.1 encoding directives (not Ion 1.0 symbol tables) are resolved by the
// core reader. In Ion 1.0, 'symbols' is never populated by the core reader.
if (sid - 1 <= localSymbolMaxOffset) {
if (sid > 0 && sid - 1 <= localSymbolMaxOffset) {
return symbols[sid - 1];
}
return null;
Expand Down Expand Up @@ -1217,7 +1226,7 @@ private class EncodingDirectiveReader {
boolean isSymbolTableAppend = false;
boolean isMacroTableAppend = false;
List<String> newSymbols = new ArrayList<>(8);
Map<MacroRef, Macro> newMacros = new HashMap<>();
Map<MacroRef, Macro> newMacros = new LinkedHashMap<>();
MacroCompiler macroCompiler = new MacroCompiler(this::resolveMacro, readerAdapter);

boolean isSymbolTableAlreadyClassified = false;
Expand Down Expand Up @@ -1490,6 +1499,10 @@ void readEncodingDirective() {
state = State.COMPILING_MACRO;
Macro newMacro = macroCompiler.compileMacro();
newMacros.put(MacroRef.byId(++localMacroMaxOffset), newMacro);
String macroName = macroCompiler.getMacroName();
if (macroName != null) {
newMacros.put(MacroRef.byName(macroName), newMacro);
}
state = State.IN_MACRO_TABLE_SEXP;
break;
default:
Expand Down Expand Up @@ -1732,6 +1745,33 @@ protected void stepOutOfEExpression() {
}
}

/**
* @return true if current value has a sequence of annotations that begins with `$ion_symbol_table`; otherwise,
* false.
*/
protected boolean startsWithIonSymbolTable() {
if (minorVersion == 0 && annotationSequenceMarker.startIndex >= 0) {
long savedPeekIndex = peekIndex;
peekIndex = annotationSequenceMarker.startIndex;
int sid = readVarUInt_1_0();
peekIndex = savedPeekIndex;
return ION_SYMBOL_TABLE_SID == sid;
} else if (minorVersion == 1) {
Marker marker = annotationTokenMarkers.get(0);
return matchesSystemSymbol_1_1(marker, SystemSymbols_1_1.ION_SYMBOL_TABLE);
}
return false;
}

/**
* @return true if the reader is positioned on a symbol table; otherwise, false.
*/
protected boolean isPositionedOnSymbolTable() {
return hasAnnotations &&
getEncodingType() == IonType.STRUCT &&
startsWithIonSymbolTable();
}

/**
* Consumes the next value (if any) from the MacroEvaluator, setting `event` based on the result.
* @return true if evaluation of the current invocation has completed; otherwise, false.
Expand All @@ -1757,6 +1797,97 @@ private boolean evaluateNext() {
return false;
}

@Override
public void transcodeTo(MacroAwareIonWriter writer) throws IOException {
registerIvmNotificationConsumer((major, minor) -> {
resetEncodingContext();
// Which IVM to write is inherent to the writer implementation.
// We don't have a single implementation that writes both formats.
writer.startEncodingSegmentWithIonVersionMarker();
});
while (transcodeNextTo(writer) != Event.NEEDS_DATA);
}

/**
* Transcodes the next value, and any encoding directives that may precede it,
* to the given writer.
* @param writer the writer to which the value will be transcoded.
* @return the result of the operation.
* @throws IOException if thrown during writing.
*/
Event transcodeNextTo(MacroAwareIonWriter writer) throws IOException {
// NOTE: this method is structured very similarly to nextValue(). During performance analysis, we should
// see if the methods can be unified without sacrificing hot path performance. Performance of this method
// is not considered critical.
lobBytesRead = 0;
while (true) {
if (parent == null || state != State.READING_VALUE) {
if (state != State.READING_VALUE && state != State.COMPILING_MACRO) {
boolean isEncodingDirectiveFromEExpression = isEvaluatingEExpression;
encodingDirectiveReader.readEncodingDirective();
if (state != State.READING_VALUE) {
throw new IonException("Unexpected EOF when writing encoding-level value.");
}
// If the encoding directive was expanded from an e-expression, that expression has already been
// written. In that case, just make sure the writer is using the new context. Otherwise, also write
// the encoding directive.
writer.startEncodingSegmentWithEncodingDirective(
encodingDirectiveReader.newMacros,
encodingDirectiveReader.isMacroTableAppend,
encodingDirectiveReader.newSymbols,
encodingDirectiveReader.isSymbolTableAppend,
isEncodingDirectiveFromEExpression
);
}
if (isEvaluatingEExpression) {
if (evaluateNext()) {
continue;
}
} else {
event = super.nextValue();
}
if (minorVersion == 1 && parent == null && isPositionedOnEncodingDirective()) {
encodingDirectiveReader.resetState();
state = State.ON_ION_ENCODING_SEXP;
continue;
}
} else if (isEvaluatingEExpression) {
if (evaluateNext()) {
continue;
}
} else {
event = super.nextValue();
}
if (valueTid != null && valueTid.isMacroInvocation) {
expressionArgsReader.beginEvaluatingMacroInvocation(macroEvaluator);
macroEvaluatorIonReader.transcodeArgumentsTo(writer);
isEvaluatingEExpression = true;
if (evaluateNext()) {
continue;
}
if (parent == null && isPositionedOnEvaluatedEncodingDirective()) {
encodingDirectiveReader.resetState();
state = State.ON_ION_ENCODING_SEXP;
continue;
}
}
if (isEvaluatingEExpression) {
// EExpressions are not expanded and provided to the writer; only the raw encoding is transferred.
continue;
}
break;
}
if (event != Event.NEEDS_DATA) {
if (minorVersion > 0 && isPositionedOnSymbolTable()) {
// TODO finalize handling of Ion 1.0-style symbol tables in Ion 1.1: https://github.com/amazon-ion/ion-java/issues/1002
throw new IonException("Macro-aware transcoding of Ion 1.1 data containing Ion 1.0-style symbol tables not yet supported.");
}
// The reader is now positioned on an actual encoding value. Write the value.
writer.writeValue(asIonReader);
}
return event;
}

@Override
public Event nextValue() {
lobBytesRead = 0;
Expand Down Expand Up @@ -2510,7 +2641,7 @@ IntList getAnnotationSidList() {
* @return a SymbolToken.
*/
protected SymbolToken getSymbolToken(int sid) {
return new SymbolTokenImpl(sid);
return new SymbolTokenImpl(getSymbol(sid), sid);
}

protected final SymbolToken getSystemSymbolToken(Marker marker) {
Expand Down
14 changes: 13 additions & 1 deletion src/main/java/com/amazon/ion/impl/_Private_IonReaderBuilder.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

package com.amazon.ion.impl;

import com.amazon.ion.IonCatalog;
import com.amazon.ion.IonException;
import com.amazon.ion.IonReader;
import com.amazon.ion.IonTextReader;
import com.amazon.ion.IonValue;
import com.amazon.ion.MacroAwareIonReader;
import com.amazon.ion.system.IonReaderBuilder;
import com.amazon.ion.util.IonStreamUtils;

Expand Down Expand Up @@ -352,4 +352,16 @@ public IonTextReader build(String ionText) {
return makeReaderText(validateCatalog(), ionText, lstFactory);
}

/**
* Creates a new {@link MacroAwareIonReader} over the given data.
* @param ionData the data to read.
* @return a new MacroAwareIonReader instance.
*/
public MacroAwareIonReader buildMacroAware(byte[] ionData) {
// TODO make this work for text too.
if (!IonStreamUtils.isIonBinary(ionData)) {
throw new UnsupportedOperationException("MacroAwareIonReader is not yet implemented for text data.");
}
return new IonReaderContinuableCoreBinary(getBufferConfiguration(), ionData, 0, ionData.length);
}
}
Loading

0 comments on commit 67ad344

Please sign in to comment.