Skip to content

Commit

Permalink
Preserve the multi-line record if it's broken in the middle of the line
Browse files Browse the repository at this point in the history
  • Loading branch information
chaochenq committed Mar 5, 2018
1 parent 830ef04 commit 881d0e0
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ private int advanceBufferToNextPattern(ByteBuffer buffer) {
}

// We've scanned to the end and there is only one complete record in the buffer, set the position to the end
if (!firstLine && buffer.limit() < buffer.capacity()) {
if (!firstLine && buffer.limit() < buffer.capacity()
&& buffer.position() > 0
&& buffer.get(buffer.position() - 1) == SingleLineSplitter.LINE_DELIMITER) {
return buffer.position();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@
import static org.testng.Assert.assertTrue;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;

Expand Down Expand Up @@ -450,6 +454,18 @@ public void testParsingRecordsOnRegexFromGenerator() throws IOException {
Path testFile = testFiles.createTempFile();
RecordGenerator generator = new RecordGenerator(true);
int expectedRecordCount = generator.appendDataToFile(testFile, getTestBytes());

// append a record that's not ending with new line. this record should be ignored
FileChannel channel = FileChannel.open(testFile, StandardOpenOption.WRITE, StandardOpenOption.APPEND);
String incompleteRecord =
String.format("%s\t%010d",
SimpleDateFormat.getDateTimeInstance().format(new Date()),
TestUtils.uniqueCounter()) +
generator.getRecordTerminator() +
"aaaaa";
channel.write(ByteBuffer.wrap(incompleteRecord.getBytes()));
channel.force(true);

P parser = buildParser();
parser = spy(parser);
TrackedFile file = new TrackedFile(flow, testFile);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ protected List<String> getRegexParsedExpectedRecords(List<String> records, Strin
String recordPattern;
String recordData;
while((recordPattern = scanner.findWithinHorizon(pattern, 0)) != null){
if (scanner.hasNext() && (recordData = scanner.next()) != null) {
if (scanner.hasNext() && (recordData = scanner.next()) != null && recordData.endsWith("\n")) {
records.add(recordPattern + recordData);
}
}
Expand Down

0 comments on commit 881d0e0

Please sign in to comment.