Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support writing datetime columns #120

Merged
merged 13 commits into from
Jun 8, 2022
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- automatically detect driver for `.geojson`, `.geojsonl` and `.geojsons` files (#101)
- read DateTime fields with millisecond accuracy (#111)
- support writing object columns with np.nan values (#118)
- support writing datetime columns (#120)

### Breaking changes

Expand Down
14 changes: 8 additions & 6 deletions docs/source/known_issues.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,16 @@ Pyogrio does not currently validate attribute values or geometry types before
attempting to write to the output file. Invalid types may crash during writing
with obscure error messages.

Date fields are not yet fully supported. These will be supported in a future
release.

## Support for reading and writing DateTimes

Currently only reading datetime values is supported.

GDAL only supports datetimes at a millisecond resolution. Reading data will thus
give at most millisecond resolution (`datetime64[ms]` data type), even though
the data is cast `datetime64[ns]` data type when reading into a data frame
using `pyogrio.read_dataframe()`.
using `pyogrio.read_dataframe()`. When writing, only precision up to ms is retained.

Not all file formats have dedicated support to store datetime data, like ESRI
Shapefile. For such formats, or if you require precision > ms, a workaround is to
convert the datetimes to string.

Timezone information is ignored at the moment, both when reading and when writing
datetime columns.
42 changes: 39 additions & 3 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@ DTYPE_OGR_FIELD_TYPES = {

'float32': (OFTReal,OFSTFloat32),
'float': (OFTReal, OFSTNone),
'float64': (OFTReal, OFSTNone)
}
'float64': (OFTReal, OFSTNone),

'datetime64[D]': (OFTDate, OFSTNone),
'datetime64': (OFTDateTime, OFSTNone),
}


cdef int start_transaction(OGRDataSourceH ogr_dataset, int force) except 1:
Expand Down Expand Up @@ -1122,7 +1124,12 @@ cdef infer_field_types(list dtypes):
field_types_view[i, 0] = OFTString
field_types_view[i, 2] = int(dtype.itemsize // 4)

# TODO: datetime types
elif dtype.name.startswith("datetime64"):
# datetime dtype precision is specified with eg. [ms], but this isn't
# usefull when writing to gdal.
field_type, field_subtype = DTYPE_OGR_FIELD_TYPES["datetime64"]
field_types_view[i, 0] = field_type
field_types_view[i, 1] = field_subtype

else:
raise NotImplementedError(f"field type is not supported {dtype.name} (field index: {i})")
Expand Down Expand Up @@ -1388,6 +1395,35 @@ def ogr_write(str path, str layer, str driver, geometry, field_data, fields,
elif field_type == OFTReal:
OGR_F_SetFieldDouble(ogr_feature, field_idx, field_value)

elif field_type == OFTDate:
datetime = field_value.item()
OGR_F_SetFieldDateTimeEx(
ogr_feature,
field_idx,
datetime.year,
datetime.month,
datetime.day,
0,
0,
0.0,
0
)

elif field_type == OFTDateTime:
# TODO: add support for timezones
datetime = field_value.astype("datetime64[ms]").item()
OGR_F_SetFieldDateTimeEx(
ogr_feature,
field_idx,
datetime.year,
datetime.month,
datetime.day,
datetime.hour,
datetime.minute,
datetime.second + datetime.microsecond / 10**6,
0
)

else:
raise NotImplementedError(f"OGR field type is not supported for writing: {field_type}")

Expand Down
10 changes: 10 additions & 0 deletions pyogrio/_ogr.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,16 @@ cdef extern from "ogr_api.h":
void OGR_F_SetFieldString(OGRFeatureH feature, int n, char *value)
void OGR_F_SetFieldBinary(OGRFeatureH feature, int n, int l, unsigned char *value)
void OGR_F_SetFieldNull(OGRFeatureH feature, int n) # new in GDAL 2.2
void OGR_F_SetFieldDateTimeEx(
OGRFeatureH hFeat,
int iField,
int nYear,
int nMonth,
int nDay,
int nHour,
int nMinute,
float fSecond,
int nTZFlag)
OGRErr OGR_F_SetGeometryDirectly(OGRFeatureH feature, OGRGeometryH geometry)

OGRFeatureDefnH OGR_FD_Create(const char *name)
Expand Down
40 changes: 40 additions & 0 deletions pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,46 @@ def test_read_write_data_types_numeric(tmp_path, ext):
assert result.dtype == result_dtype


def test_read_write_datetime(tmp_path):
field_data = [
np.array(["2005-02-01", "2005-02-02"], dtype="datetime64[D]"),
np.array(["2001-01-01T12:00", "2002-02-03T13:56:03"], dtype="datetime64[s]"),
np.array(
["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ms]"
),
np.array(
["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ns]"
theroggy marked this conversation as resolved.
Show resolved Hide resolved
),
np.array(
["2001-01-01T12:00", "2002-02-03T13:56:03.072123456"],
dtype="datetime64[ns]",
),
]
fields = [
"datetime64_d",
"datetime64_s",
"datetime64_ms",
"datetime64_ns",
"datetime64_precise_ns",
]

# Point(0, 0)
geometry = np.array(
[bytes.fromhex("010100000000000000000000000000000000000000")] * 2, dtype=object
)
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)

filename = tmp_path / "test.gpkg"
write(filename, geometry, field_data, fields, **meta)
result = read(filename)[3]
for idx, field in enumerate(fields):
if field == "datetime64_precise_ns":
# gdal rounds datetimes to ms
assert np.array_equal(result[idx], field_data[idx].astype("datetime64[ms]"))
else:
assert np.array_equal(result[idx], field_data[idx])


def test_read_data_types_numeric_with_null(test_gpkg_nulls):
fields = read(test_gpkg_nulls)[3]

Expand Down