Skip to content

Commit

Permalink
fix(rust,python): ensure trailing quote is written for temporal data …
Browse files Browse the repository at this point in the history
…when CSV `quote_style` is non-numeric (#11328)

Co-authored-by: Stijn de Gooijer <[email protected]>
  • Loading branch information
alexander-beedie and stinodego authored Sep 26, 2023
1 parent 6b316ce commit 1f0450a
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 22 deletions.
13 changes: 7 additions & 6 deletions crates/polars-io/src/csv/write_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,18 +186,19 @@ unsafe fn write_anyvalue(
},
_ => ndt.format(datetime_format),
};
return write!(f, "{formatted}").map_err(|_|{

let str_result = write!(f, "{formatted}");
if str_result.is_err() {
let datetime_format = unsafe { *datetime_formats.get_unchecked(i) };
let type_name = if tz.is_some() {
"DateTime"
} else {
"NaiveDateTime"
};
polars_err!(
ComputeError: "cannot format {} with format '{}'", type_name, datetime_format,
)
});
polars_bail!(
ComputeError: "cannot format {} with format '{}'", type_name, datetime_format,
)
};
str_result
},
#[cfg(feature = "dtype-time")]
AnyValue::Time(v) => {
Expand Down
56 changes: 40 additions & 16 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import textwrap
import zlib
from datetime import date, datetime, time, timedelta, timezone
from typing import TYPE_CHECKING, cast
from typing import TYPE_CHECKING, TypedDict, cast

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -1460,30 +1460,54 @@ def test_csv_9929() -> None:


def test_csv_quote_styles() -> None:
class TemporalFormats(TypedDict):
datetime_format: str
time_format: str

temporal_formats: TemporalFormats = {
"datetime_format": "%Y-%m-%dT%H:%M:%S",
"time_format": "%H:%M:%S",
}

dtm = datetime(2077, 7, 5, 3, 1, 0)
dt = dtm.date()
tm = dtm.time()

df = pl.DataFrame(
{
"float": [1.0, 2.0, None],
"string": ["a", "a,bc", '"hello'],
"int": [1, 2, 3],
"bool": [True, False, None],
"date": [dt, None, dt],
"datetime": [None, dtm, dtm],
"time": [tm, tm, None],
}
)

assert (
df.write_csv(quote_style="always")
== '"float","string","int","bool"\n"1.0","a","1","true"\n"2.0","a,bc","2","false"\n"","""hello","3",""\n'
)
assert (
df.write_csv(quote_style="necessary")
== 'float,string,int,bool\n1.0,a,1,true\n2.0,"a,bc",2,false\n,"""hello",3,\n'
)
assert (
df.write_csv(quote_style="never")
== 'float,string,int,bool\n1.0,a,1,true\n2.0,a,bc,2,false\n,"hello,3,\n'
)
assert (
df.write_csv(quote_style="non_numeric", quote="8")
== '8float8,8string8,8int8,8bool8\n1.0,8a8,1,8true8\n2.0,8a,bc8,2,8false8\n,8"hello8,3,\n'
assert df.write_csv(quote_style="always", **temporal_formats) == (
'"float","string","int","bool","date","datetime","time"\n'
'"1.0","a","1","true","2077-07-05","","03:01:00"\n'
'"2.0","a,bc","2","false","","2077-07-05T03:01:00","03:01:00"\n'
'"","""hello","3","","2077-07-05","2077-07-05T03:01:00",""\n'
)
assert df.write_csv(quote_style="necessary", **temporal_formats) == (
"float,string,int,bool,date,datetime,time\n"
"1.0,a,1,true,2077-07-05,,03:01:00\n"
'2.0,"a,bc",2,false,,2077-07-05T03:01:00,03:01:00\n'
',"""hello",3,,2077-07-05,2077-07-05T03:01:00,\n'
)
assert df.write_csv(quote_style="never", **temporal_formats) == (
"float,string,int,bool,date,datetime,time\n"
"1.0,a,1,true,2077-07-05,,03:01:00\n"
"2.0,a,bc,2,false,,2077-07-05T03:01:00,03:01:00\n"
',"hello,3,,2077-07-05,2077-07-05T03:01:00,\n'
)
assert df.write_csv(quote_style="non_numeric", quote="8", **temporal_formats) == (
"8float8,8string8,8int8,8bool8,8date8,8datetime8,8time8\n"
"1.0,8a8,1,8true8,82077-07-058,,803:01:008\n"
"2.0,8a,bc8,2,8false8,,82077-07-05T03:01:008,803:01:008\n"
',8"hello8,3,,82077-07-058,82077-07-05T03:01:008,\n'
)


Expand Down

0 comments on commit 1f0450a

Please sign in to comment.