Skip to content

Commit

Permalink
CSV module: restore 'yes'/'no' values for header option, handle expli…
Browse files Browse the repository at this point in the history
…cit column names (#2376)
  • Loading branch information
GuntherRademacher authored Feb 4, 2025
1 parent 36b6566 commit 1323ac8
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 43 deletions.
20 changes: 17 additions & 3 deletions basex-core/src/main/java/org/basex/io/parse/csv/CsvConverter.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.basex.io.parse.csv;

import static org.basex.query.value.type.SeqType.*;

import java.io.*;

import org.basex.build.csv.*;
Expand All @@ -12,6 +14,7 @@
import org.basex.query.value.*;
import org.basex.query.value.item.*;
import org.basex.util.*;
import org.basex.util.hash.*;
import org.basex.util.list.*;

/**
Expand Down Expand Up @@ -61,9 +64,8 @@ public abstract class CsvConverter extends Job {
* Returns a CSV converter for the given configuration.
* @param copts options
* @return CSV converter
* @throws QueryException query exception
*/
public static CsvConverter get(final CsvParserOptions copts) throws QueryException {
public static CsvConverter get(final CsvParserOptions copts) {
switch(copts.get(CsvOptions.FORMAT)) {
case XQUERY: return new CsvXQueryConverter(copts); // deprecated
case W3: return new CsvW3Converter(copts);
Expand All @@ -81,7 +83,19 @@ protected CsvConverter(final CsvParserOptions copts) {
this.copts = copts;
lax = copts.get(CsvOptions.LAX);
attributes = copts.get(CsvOptions.FORMAT) == CsvFormat.ATTRIBUTES;
skipEmpty = copts.get(CsvParserOptions.SKIP_EMPTY) && copts.get(CsvOptions.HEADER) != Bln.FALSE;
final Value header = copts.get(CsvOptions.HEADER);
skipEmpty = copts.get(CsvParserOptions.SKIP_EMPTY) && header != Bln.FALSE;
if(STRING_ZM.instance(header)) {
final TokenSet names = new TokenSet();
try {
for(final Item columnName : header) {
final byte[] token = columnName.string(null);
header(names.add(token) ? token : Token.EMPTY);
}
} catch(final QueryException ex) {
throw Util.notExpected(ex);
}
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ private static void add(final TokenBuilder entry, final int ch) {
private void record(final TokenBuilder entry, final boolean lastRow, final boolean lastField)
throws IOException {
final byte[] next = entry.next();
final byte[] field = trimWhitespace ? Token.trim(next) : next;
final byte[] field = trimWhitespace || !data ? Token.trim(next) : next;
if(field.length > 0 || !(first && lastField)) fields.add(field);
if(lastField && !(lastRow && fields.isEmpty())) {
if(data) conv.record();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,7 @@ protected Value finish(final InputInfo ii, final QueryContext qc) throws QueryEx
final XQMap map = (XQMap) super.finish(ii, qc);
Value columns = copts.get(CsvOptions.HEADER);
if(SeqType.BOOLEAN_O.instance(columns)) {
final Value names = map.get(CsvXQueryConverter.NAMES).atomValue(qc, ii);
if(copts.get(CsvOptions.TRIM_WHITESPACE)) {
columns = names;
} else {
final ValueBuilder vb = new ValueBuilder(qc);
for(final Item name : names) {
vb.add(Str.get(Token.trim(name.string(ii))));
}
columns = vb.value();
}
columns = map.get(CsvXQueryConverter.NAMES).atomValue(qc, ii);
}
final MapBuilder columnIndexBuilder = new MapBuilder();
int i = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@

import org.basex.build.csv.*;
import org.basex.query.*;
import org.basex.query.value.*;
import org.basex.query.value.item.*;
import org.basex.query.value.node.*;
import org.basex.query.value.type.*;
import org.basex.util.*;
import org.basex.util.hash.*;

/**
* This class converts CSV data to XML according to the rules of fn:csv-to-xml.
Expand Down Expand Up @@ -41,18 +38,9 @@ public class CsvW3XmlConverter extends CsvConverter {
/**
* Constructor.
* @param copts CSV options
* @throws QueryException query exception
*/
public CsvW3XmlConverter(final CsvParserOptions copts) throws QueryException {
public CsvW3XmlConverter(final CsvParserOptions copts) {
super(copts);
final TokenSet names = new TokenSet();
final Value columns = copts.get(CsvOptions.HEADER);
if(!SeqType.BOOLEAN_O.instance(columns)) {
for(final Item columnName : columns) {
final byte[] token = columnName.string(null);
header(names.add(token) ? token : Token.EMPTY, false);
}
}
}

@Override
Expand All @@ -64,16 +52,7 @@ record = FElem.build(Q_FN_ROW);

@Override
public final void header(final byte[] value) {
header(value, true);
}

/**
* Adds a new header.
* @param value header value
* @param trim whether to trim the header value
*/
public final void header(final byte[] value, final boolean trim) {
headers.add(shared.token(trim ? Token.trim(value) : value));
headers.add(shared.token(value));
}

@Override
Expand Down
16 changes: 12 additions & 4 deletions basex-core/src/main/java/org/basex/query/func/fn/ParseCsv.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.basex.query.func.fn;

import static org.basex.query.QueryError.*;
import static org.basex.util.Token.*;

import java.io.*;

Expand Down Expand Up @@ -71,10 +72,17 @@ private Value parse(final QueryContext qc, final CsvFormat format, final byte[]
toOptions(arg(1), copts, qc);

// transfer to common CSV options instance
final CsvParserOptions cpopts = format == CsvFormat.W3 || format == CsvFormat.W3_XML ?
((CsvW3Options) copts).finish(info, format) : format == CsvFormat.W3_ARRAYS ?
((CsvW3ArraysOptions) copts).finish(info, format) :
(CsvParserOptions) copts;
final CsvParserOptions cpopts;
if(format == CsvFormat.W3 || format == CsvFormat.W3_XML || format == CsvFormat.W3_ARRAYS) {
cpopts = ((CsvW3ArraysOptions) copts).finish(info, format);
} else {
cpopts = (CsvParserOptions) copts;
final Value hdr = copts.get(CsvOptions.HEADER);
if(hdr instanceof Str) {
final Boolean b = Strings.toBoolean(string(((Str) hdr).string()));
if(b != null) copts.put(CsvOptions.HEADER, Bln.get(b));
}
}
if(format != null) cpopts.set(CsvOptions.FORMAT, format);

// convert data
Expand Down
63 changes: 61 additions & 2 deletions basex-core/src/test/java/org/basex/query/func/CsvModuleTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,28 @@ public final class CsvModuleTest extends SandboxTest {
parse(" ' \" X\"'", "'quotes': true()", "<csv><record><entry> \" X\"</entry></record></csv>");
parse(" '\"X \" '", "'quotes': true()", "<csv><record><entry>X </entry></record></csv>");

parse("X\nY", "'header': false(), 'format': 'direct'", "...<record><entry>X</entry></record>");
parse("X\nY", "'header': 'no', 'format': 'direct'", "...<record><entry>X</entry></record>");
parse("X\nY", "'header': '0', 'format': 'direct'", "...<record><entry>X</entry></record>");
parse("X\nY", "'header': true(), 'format': 'direct'", "<csv><record><X>Y</X></record></csv>");
parse("X\nY", "'header': 'yes', 'format': 'direct'", "<csv><record><X>Y</X></record></csv>");
parse("X\nY", "'header': '1', 'format': 'direct'", "<csv><record><X>Y</X></record></csv>");
parse("X\nY", "'header': '01', 'format': 'direct'", "...<record><_01>X</_01></record>");
parse("X\nY", "'header': '1.0', 'format': 'direct'", "...<record><_1.0>X</_1.0></record>");
parse("X\nY", "'header': ('yes', 'no'), 'format': 'direct'", "...<yes>X</yes>");
parse("X\nY", "'header': ' h ', 'format': 'direct'", "...<h>X</h>");

parse("X\nY", "'header': false(), 'format': 'attributes'", "...<entry>X</entry>");
parse("X\nY", "'header': 'no', 'format': 'attributes'", "...<record><entry>X</entry></record>");
parse("X\nY", "'header': '0', 'format': 'attributes'", "...<record><entry>X</entry></record>");
parse("X\nY", "'header': true(), 'format': 'attributes'", "...<entry name=\"X\">Y</entry>");
parse("X\nY", "'header': 'yes', 'format': 'attributes'", "...<entry name=\"X\">Y</entry>");
parse("X\nY", "'header': '1', 'format': 'attributes'", "...<entry name=\"X\">Y</entry>");
parse("X\nY", "'header': '01', 'format': 'attributes'", "...<entry name=\"01\">Y</entry>");
parse("X\nY", "'header': '1.0', 'format': 'attributes'", "...<entry name=\"1.0\">Y</entry>");
parse("X\nY", "'header': ('yes', 'no'), 'format': 'attributes'", "...name=\"yes\">Y<");
parse("X\nY", "'header': ' h ', 'format': 'attributes'", "...<entry name=\" h \">Y</entry>");

parseError("", "'x': 'y'");
parseError("", "'format': 'abc'");
parseError("", "'separator': ''");
Expand All @@ -73,9 +95,20 @@ public final class CsvModuleTest extends SandboxTest {

/** Test method. */
@Test public void parseXQuery() {
parse("X\nY", "'header': false(), 'format': 'xquery'", "...[\"X\"],[\"Y\"]");
parse("X\nY", "'header': false(), 'format': 'xquery'", "...\"records\":([\"X\"],[\"Y\"])");
parse("X\nY", "'header': false(), 'format': 'xquery'", "{\"records\":([\"X\"],[\"Y\"])}");
parse("X\nY", "'header': 'no', 'format': 'xquery'", "{\"records\":([\"X\"],[\"Y\"])}");
parse("X\nY", "'header': '0', 'format': 'xquery'", "{\"records\":([\"X\"],[\"Y\"])}");
parse("X\nY", "'header': true(), 'format': 'xquery'", "...\"names\":[\"X\"]");
parse("X\nY", "'header': 'yes', 'format': 'xquery'", "...\"names\":[\"X\"]");
parse("X\nY", "'header': '1', 'format': 'xquery'", "...\"names\":[\"X\"]");
parse("X\nY", "'header': '01', 'format': 'xquery'", "{\"names\":[\"01\"],"
+ "\"records\":([\"X\"],[\"Y\"])}");
parse("X\nY", "'header': '1.0', 'format': 'xquery'", "{\"names\":[\"1.0\"],"
+ "\"records\":([\"X\"],[\"Y\"])}");
parse("X\nY", "'header': ('yes', 'no'), 'format': 'xquery'", "{\"names\":[\"yes\",\"no\"],"
+ "\"records\":([\"X\"],[\"Y\"])}");
parse("X\nY", "'header': ' h ' , 'format': 'xquery'", "{\"names\":[\" h \"],"
+ "\"records\":([\"X\"],[\"Y\"])}");

parse("", "'format': 'xquery'", "{\"records\":()}");
// was: "{\"records\":()}");
Expand All @@ -98,6 +131,23 @@ public final class CsvModuleTest extends SandboxTest {

/** Test method. */
@Test public void serializeXml() {
final String xml = "<csv><record><entry name='X'>Y</entry></record></csv>";
serial(xml, "'header': false()", "Y\n");
serial(xml, "'header': 'no'", "Y\n");
serial(xml, "'header': '0'", "Y\n");
serial(xml, "'header': true()", "entry\nY\n");
serial(xml, "'header': 'yes'", "entry\nY\n");
serial(xml, "'header': '1'", "entry\nY\n");
serial(xml, "'header': 'x'", "Y\n");

serial(xml, "'format': 'attributes', 'header': false()", "Y\n");
serial(xml, "'format': 'attributes', 'header': 'no'", "Y\n");
serial(xml, "'format': 'attributes', 'header': '0'", "Y\n");
serial(xml, "'format': 'attributes', 'header': true()", "X\nY\n");
serial(xml, "'format': 'attributes', 'header': 'yes'", "X\nY\n");
serial(xml, "'format': 'attributes', 'header': '1'", "X\nY\n");
serial(xml, "'format': 'attributes', 'header': 'x'", "Y\n");

serial("<csv><record><A__>1</A__></record></csv>", "'header': true(), 'lax': false()",
"A_\n1\n");
serial("<csv><record><_>1</_></record></csv>", "'header': true(), 'lax': false()", "\n1\n");
Expand Down Expand Up @@ -156,6 +206,15 @@ public final class CsvModuleTest extends SandboxTest {
"'header': true(), 'format': 'xquery'", "A,B\n");
serial(" map { 'names': [ 'A' ], 'records': [ '1' ] }",
"'header': true(), 'format': 'xquery'", "A\n1\n");

final String map = "{'names': ['A', 'B'], 'records': (['X'], ['Y'])}";
serial(map, "'format': 'xquery', 'header': false()", "X\nY\n");
serial(map, "'format': 'xquery', 'header': 'no'", "X\nY\n");
serial(map, "'format': 'xquery', 'header': '0'", "X\nY\n");
serial(map, "'format': 'xquery', 'header': true()", "A,B\nX\nY\n");
serial(map, "'format': 'xquery', 'header': 'yes'", "A,B\nX\nY\n");
serial(map, "'format': 'xquery', 'header': '1'", "A,B\nX\nY\n");
serial(map, "'format': 'xquery', 'header': ('C', 'D')", "X\nY\n");
}

/**
Expand Down

0 comments on commit 1323ac8

Please sign in to comment.