Skip to content

Commit

Permalink
MLE-12358: (Technical) Apply fix on MLCP commons-csv to the latest 1.…
Browse files Browse the repository at this point in the history
…12 release
  • Loading branch information
DarrenJAN committed Oct 28, 2024
1 parent 1c52ad3 commit b397c16
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 28 deletions.
15 changes: 15 additions & 0 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -2074,6 +2074,21 @@ public CSVParser parse(final Reader reader) throws IOException {
return new CSVParser(reader, this);
}

/**
* Parses the specified content.
*
* <p>
* See also the various static parse methods on {@link CSVParser}.
* </p>
*
* @param reader the input stream
* @param characterOffset the character offset
* @param recordNumber the record number
* @param encoding the encoding
* @return a parser over a stream of {@link CSVRecord}s.
* @throws IOException If an I/O error occurs
* @throws CSVException Thrown on invalid input.
*/
public CSVParser parse(final Reader reader, final long characterOffset, final long recordNumber, String encoding) throws IOException {
return new CSVParser(reader, this, characterOffset, recordNumber, encoding);
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/apache/commons/csv/CSVParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
this(reader, format, characterOffset, recordNumber, null);
}

public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset,final long recordNumber,
public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber,
String encoding) throws IOException {
Objects.requireNonNull(reader, "reader");
Objects.requireNonNull(format, "format");
Expand Down
2 changes: 0 additions & 2 deletions src/main/java/org/apache/commons/csv/CSVRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

package org.apache.commons.csv;

import static org.apache.commons.csv.Constants.EMPTY_STRING_ARRAY;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
private long bytesRead;
/** Encoder used to calculate the bytes of characters */
CharsetEncoder encoder;

/**
* A flag to indicate if the read is a peek operation.
*/
Expand All @@ -77,7 +77,7 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
}
isReadPeek = false;
}

/**
* Closes the stream.
*
Expand Down Expand Up @@ -141,7 +141,7 @@ public int read() throws IOException {
lineNumber++;
}
if (encoder != null && !isReadPeek) {
this.bytesRead += getCharBytes(current);
this.bytesRead += getCharBytes(current);
}
lastChar = current;
position++;
Expand All @@ -161,8 +161,8 @@ public int read() throws IOException {
* - Consists of UTF-8 some 3-byte chars and 4-byte chars
*/
private long getCharBytes(int current) throws CharacterCodingException {
char cChar = (char)current;
char lChar = (char)lastChar;
char cChar = (char) current;
char lChar = (char) lastChar;
if (!Character.isSurrogate(cChar)) {
return encoder.encode(
CharBuffer.wrap(new char[] {cChar})).limit();
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/apache/commons/csv/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ long getCharacterPosition() {
long getBytesRead() {
return reader.getBytesRead();
}

/**
* Returns the current line number
*
Expand Down
22 changes: 10 additions & 12 deletions src/test/java/org/apache/commons/csv/CSVParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -695,21 +695,19 @@ public void testGetHeaderComment_NoComment3() throws IOException {

@Test
public void testGetRecordThreeBytesRead() throws Exception {
String code = "id,date,val5,val4\n"
+ "11111111111111,'4017-09-01',きちんと節分近くには咲いてる~,v4\n"
+ "22222222222222,'4017-01-01',おはよう私の友人~,v4\n"
+ "33333333333333,'4017-01-01',きる自然の力ってすごいな~,v4\n";

String code = "id,date,val5,val4\n" +
"11111111111111,'4017-09-01',きちんと節分近くには咲いてる~,v4\n" +
"22222222222222,'4017-01-01',おはよう私の友人~,v4\n" +
"33333333333333,'4017-01-01',きる自然の力ってすごいな~,v4\n";
// final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
final CSVFormat format = CSVFormat.Builder.create()
.setDelimiter(',')
.setQuote('\'')
.build();
// CSVParser parser = new CSVParser(new StringReader(code), format, 0L, 1L, "UTF-8");

CSVParser parser = format.parse(new StringReader(code), 0L, 1L, "UTF-8");

CSVRecord record;
CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);
assertEquals(0, parser.getRecordNumber());
assertNotNull(record = parser.nextRecord());
assertEquals(1, record.getRecordNumber());
Expand Down Expand Up @@ -737,16 +735,16 @@ public void testGetRecordThreeBytesRead() throws Exception {

@Test
public void testGetRecordFourBytesRead() throws Exception {
String code = "id,a,b,c\n"
+ "1,😊,🤔,😂\n"
+ "2,😊,🤔,😂\n"
+ "3,😊,🤔,😂\n";
String code = "id,a,b,c\n" +
"1,😊,🤔,😂\n" +
"2,😊,🤔,😂\n" +
"3,😊,🤔,😂\n";
// final CSVFormat format = CSVFormat.newFormat(',').withQuote('\'');
final CSVFormat format = CSVFormat.Builder.create()
.setDelimiter(',')
.setQuote('\'')
.build();

// CSVParser parser = new CSVParser(new StringReader(code), format, 0L, 1L, "UTF-8");
CSVParser parser = format.parse(new StringReader(code), 0L, 1L, "UTF-8");

Expand Down
10 changes: 3 additions & 7 deletions src/test/java/org/apache/commons/csv/JiraCsv196Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ public void parseThreeBytes() throws IOException {
// "org/apache/commons/csv/CSV-196/japanese.csv"), format, 0L, 1L, "UTF-8");
CSVParser parser = format.parse(getTestInput(
"org/apache/commons/csv/CSV-196/japanese.csv"), 0L, 1L, "UTF-8");


long[] charByteKey = {0, 89, 242, 395};
long[] charByteKey = {0, 89, 242, 395};
int idx = 0;
for (CSVRecord record : parser) {
assertEquals(charByteKey[idx++], record.getCharacterByte());
Expand All @@ -57,9 +55,7 @@ public void parseFourBytes() throws IOException {
.setDelimiter(',')
.setQuote('\'')
.build();

// CSVParser parser = new CSVParser(getTestInput(
// "org/apache/commons/csv/CSV-196/emoji.csv"), format, 0L, 1L, "UTF-8");

CSVParser parser = format.parse(getTestInput(
"org/apache/commons/csv/CSV-196/emoji.csv"), 0L, 1L, "UTF-8");

Expand All @@ -76,4 +72,4 @@ private Reader getTestInput(String path) {
return new InputStreamReader(
ClassLoader.getSystemClassLoader().getResourceAsStream(path));
}
}
}

0 comments on commit b397c16

Please sign in to comment.