Skip to content

Commit

Permalink
Support writing datetime columns (#120)
Browse files Browse the repository at this point in the history
  • Loading branch information
theroggy authored Jun 8, 2022
1 parent e4f7fc6 commit 99d382e
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- automatically detect driver for `.geojson`, `.geojsonl` and `.geojsons` files (#101)
- read DateTime fields with millisecond accuracy (#111)
- support writing object columns with np.nan values (#118)
- support writing datetime columns (#120)

### Breaking changes

Expand Down
14 changes: 8 additions & 6 deletions docs/source/known_issues.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,16 @@ Pyogrio does not currently validate attribute values or geometry types before
attempting to write to the output file. Invalid types may crash during writing
with obscure error messages.

Date fields are not yet fully supported. These will be supported in a future
release.

## Support for reading and writing DateTimes

Currently only reading datetime values is supported.

GDAL only supports datetimes at a millisecond resolution. Reading data will thus
give at most millisecond resolution (`datetime64[ms]` data type), even though
the data is cast `datetime64[ns]` data type when reading into a data frame
using `pyogrio.read_dataframe()`.
using `pyogrio.read_dataframe()`. When writing, only precision up to ms is retained.

Not all file formats have dedicated support to store datetime data, like ESRI
Shapefile. For such formats, or if you require precision > ms, a workaround is to
convert the datetimes to string.

Timezone information is ignored at the moment, both when reading and when writing
datetime columns.
42 changes: 39 additions & 3 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@ DTYPE_OGR_FIELD_TYPES = {

'float32': (OFTReal,OFSTFloat32),
'float': (OFTReal, OFSTNone),
'float64': (OFTReal, OFSTNone)
}
'float64': (OFTReal, OFSTNone),

'datetime64[D]': (OFTDate, OFSTNone),
'datetime64': (OFTDateTime, OFSTNone),
}


cdef int start_transaction(OGRDataSourceH ogr_dataset, int force) except 1:
Expand Down Expand Up @@ -1122,7 +1124,12 @@ cdef infer_field_types(list dtypes):
field_types_view[i, 0] = OFTString
field_types_view[i, 2] = int(dtype.itemsize // 4)

# TODO: datetime types
elif dtype.name.startswith("datetime64"):
# datetime dtype precision is specified with eg. [ms], but this isn't
# usefull when writing to gdal.
field_type, field_subtype = DTYPE_OGR_FIELD_TYPES["datetime64"]
field_types_view[i, 0] = field_type
field_types_view[i, 1] = field_subtype

else:
raise NotImplementedError(f"field type is not supported {dtype.name} (field index: {i})")
Expand Down Expand Up @@ -1388,6 +1395,35 @@ def ogr_write(str path, str layer, str driver, geometry, field_data, fields,
elif field_type == OFTReal:
OGR_F_SetFieldDouble(ogr_feature, field_idx, field_value)

elif field_type == OFTDate:
datetime = field_value.item()
OGR_F_SetFieldDateTimeEx(
ogr_feature,
field_idx,
datetime.year,
datetime.month,
datetime.day,
0,
0,
0.0,
0
)

elif field_type == OFTDateTime:
# TODO: add support for timezones
datetime = field_value.astype("datetime64[ms]").item()
OGR_F_SetFieldDateTimeEx(
ogr_feature,
field_idx,
datetime.year,
datetime.month,
datetime.day,
datetime.hour,
datetime.minute,
datetime.second + datetime.microsecond / 10**6,
0
)

else:
raise NotImplementedError(f"OGR field type is not supported for writing: {field_type}")

Expand Down
10 changes: 10 additions & 0 deletions pyogrio/_ogr.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,16 @@ cdef extern from "ogr_api.h":
void OGR_F_SetFieldString(OGRFeatureH feature, int n, char *value)
void OGR_F_SetFieldBinary(OGRFeatureH feature, int n, int l, unsigned char *value)
void OGR_F_SetFieldNull(OGRFeatureH feature, int n) # new in GDAL 2.2
void OGR_F_SetFieldDateTimeEx(
OGRFeatureH hFeat,
int iField,
int nYear,
int nMonth,
int nDay,
int nHour,
int nMinute,
float fSecond,
int nTZFlag)
OGRErr OGR_F_SetGeometryDirectly(OGRFeatureH feature, OGRGeometryH geometry)

OGRFeatureDefnH OGR_FD_Create(const char *name)
Expand Down
40 changes: 40 additions & 0 deletions pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,46 @@ def test_read_write_data_types_numeric(tmp_path, ext):
assert result.dtype == result_dtype


def test_read_write_datetime(tmp_path):
field_data = [
np.array(["2005-02-01", "2005-02-02"], dtype="datetime64[D]"),
np.array(["2001-01-01T12:00", "2002-02-03T13:56:03"], dtype="datetime64[s]"),
np.array(
["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ms]"
),
np.array(
["2001-01-01T12:00", "2002-02-03T13:56:03.072"], dtype="datetime64[ns]"
),
np.array(
["2001-01-01T12:00", "2002-02-03T13:56:03.072123456"],
dtype="datetime64[ns]",
),
]
fields = [
"datetime64_d",
"datetime64_s",
"datetime64_ms",
"datetime64_ns",
"datetime64_precise_ns",
]

# Point(0, 0)
geometry = np.array(
[bytes.fromhex("010100000000000000000000000000000000000000")] * 2, dtype=object
)
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)

filename = tmp_path / "test.gpkg"
write(filename, geometry, field_data, fields, **meta)
result = read(filename)[3]
for idx, field in enumerate(fields):
if field == "datetime64_precise_ns":
# gdal rounds datetimes to ms
assert np.array_equal(result[idx], field_data[idx].astype("datetime64[ms]"))
else:
assert np.array_equal(result[idx], field_data[idx])


def test_read_data_types_numeric_with_null(test_gpkg_nulls):
fields = read(test_gpkg_nulls)[3]

Expand Down

0 comments on commit 99d382e

Please sign in to comment.