Skip to content

Commit

Permalink
Refactor FST.saveMetadata() to FSTMetadata.save() (apache#13549)
Browse files Browse the repository at this point in the history
* lazily write the FST padding byte

* Also write the pad byte when there is emptyOutput

* add comment

* Make Lucene90BlockTreeTermsWriter to write FST off-heap

* Add change log

* Tidy code & Add comments

* use temp IndexOutput for FST writing

* Use IOUtils to delete files

* Update CHANGES.txt

* Update CHANGES.txt
  • Loading branch information
dungba88 authored Jul 22, 2024
1 parent af9a2b9 commit 97d89c6
Showing 1 changed file with 55 additions and 47 deletions.
102 changes: 55 additions & 47 deletions lucene/core/src/java/org/apache/lucene/util/fst/FST.java
Original file line number Diff line number Diff line change
Expand Up @@ -528,56 +528,10 @@ public FSTMetadata<T> getMetadata() {
* @param out the DataOutput to write the FST bytes to
*/
public void save(DataOutput metaOut, DataOutput out) throws IOException {
saveMetadata(metaOut);
metadata.save(metaOut);
fstReader.writeTo(out);
}

/**
* Save the metadata to a DataOutput
*
* @param metaOut the DataOutput to write the metadata to
*/
public void saveMetadata(DataOutput metaOut) throws IOException {
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (metadata.emptyOutput != null) {
// Accepts empty string
metaOut.writeByte((byte) 1);

// Serialize empty-string output:
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
outputs.writeFinalOutput(metadata.emptyOutput, ros);
byte[] emptyOutputBytes = ros.toArrayCopy();
int emptyLen = emptyOutputBytes.length;

// reverse
final int stopAt = emptyLen / 2;
int upto = 0;
while (upto < stopAt) {
final byte b = emptyOutputBytes[upto];
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
emptyOutputBytes[emptyLen - upto - 1] = b;
upto++;
}
metaOut.writeVInt(emptyLen);
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
} else {
metaOut.writeByte((byte) 0);
}
final byte t;
if (metadata.inputType == INPUT_TYPE.BYTE1) {
t = 0;
} else if (metadata.inputType == INPUT_TYPE.BYTE2) {
t = 1;
} else {
t = 2;
}
metaOut.writeByte(t);
metaOut.writeVLong(metadata.startNode);
metaOut.writeVLong(numBytes());
}

/** Writes an automaton to a file. */
public void save(final Path path) throws IOException {
try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
Expand Down Expand Up @@ -1249,5 +1203,59 @@ public FSTMetadata(
public int getVersion() {
return version;
}

public T getEmptyOutput() {
return emptyOutput;
}

public long getNumBytes() {
return numBytes;
}

/**
* Save the metadata to a DataOutput
*
* @param metaOut the DataOutput to write the metadata to
*/
public void save(DataOutput metaOut) throws IOException {
CodecUtil.writeHeader(metaOut, FILE_FORMAT_NAME, VERSION_CURRENT);
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (emptyOutput != null) {
// Accepts empty string
metaOut.writeByte((byte) 1);

// Serialize empty-string output:
ByteBuffersDataOutput ros = new ByteBuffersDataOutput();
outputs.writeFinalOutput(emptyOutput, ros);
byte[] emptyOutputBytes = ros.toArrayCopy();
int emptyLen = emptyOutputBytes.length;

// reverse
final int stopAt = emptyLen / 2;
int upto = 0;
while (upto < stopAt) {
final byte b = emptyOutputBytes[upto];
emptyOutputBytes[upto] = emptyOutputBytes[emptyLen - upto - 1];
emptyOutputBytes[emptyLen - upto - 1] = b;
upto++;
}
metaOut.writeVInt(emptyLen);
metaOut.writeBytes(emptyOutputBytes, 0, emptyLen);
} else {
metaOut.writeByte((byte) 0);
}
final byte t;
if (inputType == INPUT_TYPE.BYTE1) {
t = 0;
} else if (inputType == INPUT_TYPE.BYTE2) {
t = 1;
} else {
t = 2;
}
metaOut.writeByte(t);
metaOut.writeVLong(startNode);
metaOut.writeVLong(numBytes);
}
}
}

0 comments on commit 97d89c6

Please sign in to comment.