From cc6cb939654e15e3cc8fea5147c83baed5c62e96 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 5 Sep 2024 11:03:11 +0200 Subject: [PATCH 01/27] BLD/RLS: fix aarch64 wheel building (#468) --- ...nylinux_2_28_aarch64-vcpkg-gdal.Dockerfile | 4 ---- ci/vcpkg_linux_crosscompiling.patch | 22 ------------------- 2 files changed, 26 deletions(-) delete mode 100644 ci/vcpkg_linux_crosscompiling.patch diff --git a/ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile b/ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile index 174b37a1..5a713eaf 100644 --- a/ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile +++ b/ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile @@ -26,10 +26,6 @@ RUN bootstrap-vcpkg.sh && \ COPY ci/custom-triplets/arm64-linux-dynamic-release.cmake opt/vcpkg/custom-triplets/arm64-linux-dynamic-release.cmake COPY ci/vcpkg.json opt/vcpkg/ -# temporary workaround for https://github.com/microsoft/vcpkg/issues/36094 -COPY ci/vcpkg_linux_crosscompiling.patch opt/vcpkg/ -RUN git -C /opt/vcpkg apply vcpkg_linux_crosscompiling.patch - ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/opt/vcpkg/installed/arm64-linux-dynamic-release/lib" RUN vcpkg install --overlay-triplets=opt/vcpkg/custom-triplets \ --feature-flags="versions,manifests" \ diff --git a/ci/vcpkg_linux_crosscompiling.patch b/ci/vcpkg_linux_crosscompiling.patch deleted file mode 100644 index 53a7a5fa..00000000 --- a/ci/vcpkg_linux_crosscompiling.patch +++ /dev/null @@ -1,22 +0,0 @@ -diff --git a/scripts/toolchains/linux.cmake b/scripts/toolchains/linux.cmake -index f22c71076..a68d6cbe8 100644 ---- a/scripts/toolchains/linux.cmake -+++ b/scripts/toolchains/linux.cmake -@@ -13,7 +13,7 @@ elseif(VCPKG_TARGET_ARCHITECTURE STREQUAL "x86") - string(APPEND VCPKG_LINKER_FLAGS " -m32") - elseif(VCPKG_TARGET_ARCHITECTURE STREQUAL "arm") - set(CMAKE_SYSTEM_PROCESSOR armv7l CACHE STRING "") -- if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux") -+ if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") - if(NOT DEFINED CMAKE_CXX_COMPILER) - set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++") - endif() -@@ -30,7 +30,7 @@ elseif(VCPKG_TARGET_ARCHITECTURE STREQUAL "arm") - endif() - elseif(VCPKG_TARGET_ARCHITECTURE STREQUAL "arm64") - set(CMAKE_SYSTEM_PROCESSOR aarch64 CACHE STRING "") -- if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux") -+ if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") - if(NOT DEFINED CMAKE_CXX_COMPILER) - set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") - endif() From ea121c35c49fa13be2b7877b2c5ee0444853fa44 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Sep 2024 09:09:35 -0700 Subject: [PATCH 02/27] Bump pypa/cibuildwheel from 2.20.0 to 2.21.0 (#471) --- .circleci/config.yml | 2 +- .github/workflows/release.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index af135db3..498fdf06 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -20,7 +20,7 @@ jobs: - run: name: Build the Linux aarch64 wheels. command: | - python3 -m pip install --user cibuildwheel==2.20.0 + python3 -m pip install --user cibuildwheel==2.21.0 python3 -m cibuildwheel --output-dir wheelhouse - run: name: Test the wheels diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 56324590..8da37f01 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -137,7 +137,7 @@ jobs: BUILDKIT_PROGRESS: plain - name: Build wheels - uses: pypa/cibuildwheel@v2.20.0 + uses: pypa/cibuildwheel@v2.21.0 - uses: actions/upload-artifact@v4 with: @@ -220,7 +220,7 @@ jobs: path: ${{ matrix.vcpkg_logs }} - name: Build wheels - uses: pypa/cibuildwheel@v2.20.0 + uses: pypa/cibuildwheel@v2.21.0 env: # CIBW needs to know triplet for the correct install path VCPKG_DEFAULT_TRIPLET: ${{ matrix.triplet }} From 262e925a5e7273f0a6b306a5f3429c527c0f5a56 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 16 Sep 2024 18:11:21 +0200 Subject: [PATCH 03/27] MAINT: move generic project settings to pyproject.toml (#469) --- pyproject.toml | 36 ++++++++++++++++++++++++++++++++++-- setup.py | 16 ---------------- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 53084902..fb09a0f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,9 +6,42 @@ requires = [ # tomli is used by versioneer "tomli; python_version < '3.11'", ] +build-backend = "setuptools.build_meta" + +[project] +name = "pyogrio" +dynamic = ["version"] +authors = [ + { name = "Brendan C. Ward", email = "bcward@astutespruce.com" }, + { name = "pyogrio contributors" } +] +maintainers = [{ name = "pyogrio contributors" }] +license = { file = "LICENSE" } +description = "Vectorized spatial vector file format I/O using GDAL/OGR" +readme = "README.md" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: GIS", +] +requires-python = ">=3.8" +dependencies = ["certifi", "numpy", "packaging"] + +[project.optional-dependencies] +dev = ["cython"] +test = ["pytest", "pytest-cov"] +benchmark = ["pytest-benchmark"] +geopandas = ["geopandas"] + +[project.urls] +Home = "https://pyogrio.readthedocs.io/" +Repository = "https://github.com/geopandas/pyogrio" [tool.cibuildwheel] -skip = ["cp36-*", "cp37-*", "pp*", "*musllinux*"] +skip = ["pp*", "*musllinux*"] archs = ["auto64"] manylinux-x86_64-image = "manylinux-vcpkg-gdal:latest" manylinux-aarch64-image = "manylinux-aarch64-vcpkg-gdal:latest" @@ -63,7 +96,6 @@ tag_prefix = "v" [tool.ruff] line-length = 88 extend-exclude = ["doc/*", "benchmarks/*", "pyogrio/_version.py"] -target-version = "py38" [tool.ruff.lint] select = [ diff --git a/setup.py b/setup.py index 99833162..25699c09 100644 --- a/setup.py +++ b/setup.py @@ -202,24 +202,8 @@ def get_gdal_config(): cmdclass["build_ext"] = build_ext setup( - name="pyogrio", version=version, packages=find_packages(), - url="https://github.com/geopandas/pyogrio", - license="MIT", - author="Brendan C. Ward", - author_email="bcward@astutespruce.com", - description="Vectorized spatial vector file format I/O using GDAL/OGR", - long_description_content_type="text/markdown", - long_description=open("README.md").read(), - python_requires=">=3.8", - install_requires=["certifi", "numpy", "packaging"], - extras_require={ - "dev": ["Cython"], - "test": ["pytest", "pytest-cov"], - "benchmark": ["pytest-benchmark"], - "geopandas": ["geopandas"], - }, include_package_data=True, exclude_package_data={'': ['*.h', '_*.pxd', '_*.pyx']}, cmdclass=cmdclass, From 096e85f32a2759ebe5286192ed814e6a9bd6aa31 Mon Sep 17 00:00:00 2001 From: Brendan Ward Date: Tue, 17 Sep 2024 09:31:04 -0700 Subject: [PATCH 04/27] DOC: Add install note re: advanced drivers on conda-forge (#474) --- docs/source/install.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/source/install.md b/docs/source/install.md index 04b1853e..5419a36d 100644 --- a/docs/source/install.md +++ b/docs/source/install.md @@ -22,7 +22,15 @@ conda install -c conda-forge pyogrio This requires compatible versions of `GDAL` and `numpy` from `conda-forge` for raw I/O support and `geopandas` and their dependencies for GeoDataFrame -I/O support. +I/O support. By default, the `GDAL` package on conda-forge already supports a +wide range of vector formats. If needed, you can install additional drivers by +installing the associated +[conda-forge package](https://gdal.org/en/latest/download.html#conda). The +following packages are currently available to install extra vector drivers: + +- `libgdal-arrow-parquet` ((Geo)Parquet and (Geo)Arrow IPC) +- `libgdal-pg` (PostgreSQL / PostGIS) +- `libgdal-xls` (XLS - MS Excel format) ### PyPI @@ -40,6 +48,11 @@ most likely due to the installation process falling back to installing from the source distribution because the available wheels are not compatible with your platform. +The binary wheels available on PyPI include the core GDAL drivers (GeoJSON, +ESRI Shapefile, GPKG, FGB, OpenFileGDB, etc) but do not include more advanced +drivers such as LIBKML and Spatialite. If you need such drivers, we recommend +that you use conda-forge to install pyogrio as explained above. + ### Troubleshooting installation errors If you install GeoPandas or Fiona using `pip`, you may encounter issues related From 0441e2c8cdf2d7413ea40e405cf2e45aa6f99f52 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 17 Sep 2024 18:31:58 +0200 Subject: [PATCH 05/27] BLD: test the Python 3.13 wheels (#475) --- .github/workflows/release.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8da37f01..a0f27b22 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -247,7 +247,7 @@ jobs: "macos-12", "macos-latest", ] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] include: - os: "ubuntu-latest" artifact: pyogrio-wheel-linux-manylinux2014_x86_64 @@ -302,9 +302,13 @@ jobs: - name: Install dependencies and pyogrio wheel shell: bash run: | - uv pip install -r ci/requirements-wheel-test.txt + if [ ${{ matrix.python-version }} != "3.13" ]; then + uv pip install -r ci/requirements-wheel-test.txt + else + uv pip install pytest numpy certifi packaging + fi uv pip install --no-cache --pre --no-index --find-links wheelhouse pyogrio - if [ ${{ matrix.python-version }} != "3.12" ]; then + if [ ${{ matrix.python-version }} != "3.13" ]; then uv pip install --no-deps geopandas fi uv pip list From 4a3f4dab7764e20d4f6812518075515380c1ad9a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 24 Sep 2024 08:25:52 -0700 Subject: [PATCH 06/27] Bump pypa/cibuildwheel from 2.21.0 to 2.21.1 (#476) --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a0f27b22..238b291d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -137,7 +137,7 @@ jobs: BUILDKIT_PROGRESS: plain - name: Build wheels - uses: pypa/cibuildwheel@v2.21.0 + uses: pypa/cibuildwheel@v2.21.1 - uses: actions/upload-artifact@v4 with: @@ -220,7 +220,7 @@ jobs: path: ${{ matrix.vcpkg_logs }} - name: Build wheels - uses: pypa/cibuildwheel@v2.21.0 + uses: pypa/cibuildwheel@v2.21.1 env: # CIBW needs to know triplet for the correct install path VCPKG_DEFAULT_TRIPLET: ${{ matrix.triplet }} From 55438912713030b71fc79d5c04bca420d4db0e2c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 24 Sep 2024 19:38:16 +0200 Subject: [PATCH 07/27] MAINT: drop Python 3.8 support (#473) --- .github/workflows/docker-gdal.yml | 13 +++++++++++-- .github/workflows/release.yml | 2 +- CHANGES.md | 2 ++ README.md | 2 +- docs/source/install.md | 2 +- pyogrio/_env.py | 3 +-- pyproject.toml | 2 +- setup.py | 4 ++-- 8 files changed, 20 insertions(+), 10 deletions(-) diff --git a/.github/workflows/docker-gdal.yml b/.github/workflows/docker-gdal.yml index 189954a0..19fe609e 100644 --- a/.github/workflows/docker-gdal.yml +++ b/.github/workflows/docker-gdal.yml @@ -25,8 +25,8 @@ jobs: - "ghcr.io/osgeo/gdal:ubuntu-small-3.8.5" # python 3.10.12 - "ghcr.io/osgeo/gdal:ubuntu-small-3.7.3" # python 3.10.12 - "ghcr.io/osgeo/gdal:ubuntu-small-3.6.4" # python 3.10.6 - - "osgeo/gdal:ubuntu-small-3.5.3" # python 3.8.10 - - "osgeo/gdal:ubuntu-small-3.4.3" # python 3.8.10 + - "osgeo/gdal:ubuntu-small-3.5.3" # python 3.9.20 (installed manually) + - "osgeo/gdal:ubuntu-small-3.4.3" # python 3.9.20 (installed manually) container: image: ${{ matrix.container }} @@ -36,6 +36,15 @@ jobs: run: | apt-get update && apt-get install -y build-essential git python3-dev + - name: Install Python + # the GDAL 3.4 and 3.5 images do have Python 3.8 installed, so have to + # install a more recent Python version manually + if: matrix.container == 'osgeo/gdal:ubuntu-small-3.5.3' || matrix.container == 'osgeo/gdal:ubuntu-small-3.4.3' + run: | + apt-get update && apt-get install -y software-properties-common + add-apt-repository -y ppa:deadsnakes/ppa + apt-get update && apt-get install -y python3.9-dev + - uses: actions/checkout@v4 - name: Create virtual environment diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 238b291d..f1ab3dec 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -247,7 +247,7 @@ jobs: "macos-12", "macos-latest", ] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] include: - os: "ubuntu-latest" artifact: pyogrio-wheel-linux-manylinux2014_x86_64 diff --git a/CHANGES.md b/CHANGES.md index 242cf518..02eb6087 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -18,6 +18,8 @@ This also leads to `pyproj` becoming an optional dependency; you will need to install `pyproj` in order to support spatial reference systems (#452). - The GDAL library included in the wheels is updated from 3.8.5 to GDAL 3.9.2 (#466). +- pyogrio now requires a minimum version of Python >= 3.9 (#473). +- Wheels are now available for Python 3.13. ## 0.9.0 (2024-06-17) diff --git a/README.md b/README.md index 0b02463d..8b9f8868 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ substantial change. Please see [CHANGES](CHANGES.md). ## Requirements -Supports Python 3.8 - 3.11 and GDAL 3.4.x - 3.8.x. +Supports Python 3.9 - 3.13 and GDAL 3.4.x - 3.9.x. Reading to GeoDataFrames requires `geopandas>=0.12` with `shapely>=2`. diff --git a/docs/source/install.md b/docs/source/install.md index 5419a36d..94513d11 100644 --- a/docs/source/install.md +++ b/docs/source/install.md @@ -2,7 +2,7 @@ ## Requirements -Supports Python 3.8 - 3.11 and GDAL 3.4.x - 3.8.x +Supports Python 3.9 - 3.13 and GDAL 3.4.x - 3.9.x Reading to GeoDataFrames requires `geopandas>=0.12` with `shapely>=2`. diff --git a/pyogrio/_env.py b/pyogrio/_env.py index ee15883c..76e8e4b6 100644 --- a/pyogrio/_env.py +++ b/pyogrio/_env.py @@ -7,7 +7,6 @@ import logging import os import platform -import sys from contextlib import contextmanager from pathlib import Path @@ -28,7 +27,7 @@ gdal_dll_dir = None -if platform.system() == "Windows" and sys.version_info >= (3, 8): +if platform.system() == "Windows": # if loading of extension modules fails, search for gdal dll directory try: import pyogrio._io # noqa: F401 diff --git a/pyproject.toml b/pyproject.toml index fb09a0f0..26617a79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering :: GIS", ] -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = ["certifi", "numpy", "packaging"] [project.optional-dependencies] diff --git a/setup.py b/setup.py index 25699c09..4acb7677 100644 --- a/setup.py +++ b/setup.py @@ -20,12 +20,12 @@ logger = logging.getLogger(__name__) -MIN_PYTHON_VERSION = (3, 8, 0) +MIN_PYTHON_VERSION = (3, 9, 0) MIN_GDAL_VERSION = (2, 4, 0) if sys.version_info < MIN_PYTHON_VERSION: - raise RuntimeError("Python >= 3.8 is required") + raise RuntimeError("Python >= 3.9 is required") def copy_data_tree(datadir, destdir): From 52c33f306b332ef52b6b9803ffa68586f68872b2 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 27 Sep 2024 17:49:42 +0200 Subject: [PATCH 08/27] ENH: add support to read, write, list and remove `/vsimem/` files (#457) --- CHANGES.md | 4 + docs/source/api.rst | 2 +- environment-dev.yml | 4 +- pyogrio/__init__.py | 10 +- pyogrio/_io.pyx | 91 +++++++------ pyogrio/_ogr.pxd | 10 +- pyogrio/_vsi.pxd | 4 +- pyogrio/_vsi.pyx | 207 +++++++++++++++++++++++++---- pyogrio/core.py | 58 ++++++++ pyogrio/raw.py | 2 +- pyogrio/tests/conftest.py | 28 +++- pyogrio/tests/test_arrow.py | 20 +++ pyogrio/tests/test_core.py | 147 ++++++++++++++------ pyogrio/tests/test_geopandas_io.py | 53 +++++++- pyogrio/tests/test_path.py | 29 ++-- pyogrio/tests/test_util.py | 56 ++++++++ pyogrio/util.py | 48 +++++-- 17 files changed, 624 insertions(+), 149 deletions(-) create mode 100644 pyogrio/tests/test_util.py diff --git a/CHANGES.md b/CHANGES.md index 02eb6087..66c15f07 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,10 @@ ## 0.10.0 (yyyy-mm-dd) +### Improvements + +- Add support to read, write, list, and remove `/vsimem/` files (#457) + ### Bug fixes - Silence warning from `write_dataframe` with `GeoSeries.notna()` (#435). diff --git a/docs/source/api.rst b/docs/source/api.rst index 007470d7..105fbb3f 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -5,7 +5,7 @@ Core ---- .. automodule:: pyogrio - :members: list_drivers, detect_write_driver, list_layers, read_bounds, read_info, set_gdal_config_options, get_gdal_config_option, __gdal_version__, __gdal_version_string__ + :members: list_drivers, detect_write_driver, list_layers, read_bounds, read_info, set_gdal_config_options, get_gdal_config_option, vsi_listtree, vsi_rmtree, vsi_unlink, __gdal_version__, __gdal_version_string__ GeoPandas integration --------------------- diff --git a/environment-dev.yml b/environment-dev.yml index c6de8e69..d92ac999 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -3,13 +3,13 @@ channels: - conda-forge dependencies: # Required - - numpy - libgdal-core + - numpy - shapely>=2 # Optional - geopandas-base - - pyproj - pyarrow + - pyproj # Specific for dev - cython - pre-commit diff --git a/pyogrio/__init__.py b/pyogrio/__init__.py index a5f32511..c5450c6e 100644 --- a/pyogrio/__init__.py +++ b/pyogrio/__init__.py @@ -21,6 +21,9 @@ read_bounds, read_info, set_gdal_config_options, + vsi_listtree, + vsi_rmtree, + vsi_unlink, ) from pyogrio.geopandas import read_dataframe, write_dataframe from pyogrio.raw import open_arrow, read_arrow, write_arrow @@ -37,10 +40,13 @@ "set_gdal_config_options", "get_gdal_config_option", "get_gdal_data_path", - "read_arrow", "open_arrow", - "write_arrow", + "read_arrow", "read_dataframe", + "vsi_listtree", + "vsi_rmtree", + "vsi_unlink", + "write_arrow", "write_dataframe", "__gdal_version__", "__gdal_version_string__", diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 7c6427ce..a9c934e5 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -12,6 +12,7 @@ import math import os import sys import warnings +from pathlib import Path from libc.stdint cimport uint8_t, uintptr_t from libc.stdlib cimport malloc, free @@ -1184,7 +1185,7 @@ def ogr_read( ): cdef int err = 0 - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef const char *where_c = NULL @@ -1224,7 +1225,7 @@ def ogr_read( raise ValueError("'max_features' must be >= 0") try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer if encoding: # for shapefiles, SHAPE_ENCODING must be set before opening the file @@ -1362,8 +1363,8 @@ def ogr_read( CPLFree(prev_shape_encoding) prev_shape_encoding = NULL - if is_vsimem: - delete_vsimem_file(path) + if use_tmp_vsimem: + vsimem_rmtree_toplevel(path) return ( meta, @@ -1424,7 +1425,7 @@ def ogr_open_arrow( ): cdef int err = 0 - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef const char *where_c = NULL @@ -1480,7 +1481,7 @@ def ogr_open_arrow( reader = None try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer if encoding: override_shape_encoding = True @@ -1679,8 +1680,8 @@ def ogr_open_arrow( CPLFree(prev_shape_encoding) prev_shape_encoding = NULL - if is_vsimem: - delete_vsimem_file(path) + if use_tmp_vsimem: + vsimem_rmtree_toplevel(path) def ogr_read_bounds( @@ -1697,7 +1698,7 @@ def ogr_read_bounds( object mask=None): cdef int err = 0 - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef const char *where_c = NULL cdef OGRDataSourceH ogr_dataset = NULL @@ -1715,7 +1716,7 @@ def ogr_read_bounds( raise ValueError("'max_features' must be >= 0") try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL) if layer is None: @@ -1744,8 +1745,8 @@ def ogr_read_bounds( GDALClose(ogr_dataset) ogr_dataset = NULL - if is_vsimem: - delete_vsimem_file(path) + if use_tmp_vsimem: + vsimem_rmtree_toplevel(path) return bounds @@ -1758,7 +1759,7 @@ def ogr_read_info( int force_feature_count=False, int force_total_bounds=False): - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef char **dataset_options = NULL cdef OGRDataSourceH ogr_dataset = NULL @@ -1767,7 +1768,7 @@ def ogr_read_info( cdef bint override_shape_encoding = False try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer if encoding: override_shape_encoding = True @@ -1826,19 +1827,19 @@ def ogr_read_info( if prev_shape_encoding != NULL: CPLFree(prev_shape_encoding) - if is_vsimem: - delete_vsimem_file(path) + if use_tmp_vsimem: + vsimem_rmtree_toplevel(path) return meta def ogr_list_layers(object path_or_buffer): - cdef bint is_vsimem = isinstance(path_or_buffer, bytes) + cdef bint use_tmp_vsimem = isinstance(path_or_buffer, bytes) cdef const char *path_c = NULL cdef OGRDataSourceH ogr_dataset = NULL try: - path = read_buffer_to_vsimem(path_or_buffer) if is_vsimem else path_or_buffer + path = read_buffer_to_vsimem(path_or_buffer) if use_tmp_vsimem else path_or_buffer ogr_dataset = ogr_open(path.encode('UTF-8'), 0, NULL) layers = get_layer_names(ogr_dataset) @@ -1847,8 +1848,8 @@ def ogr_list_layers(object path_or_buffer): GDALClose(ogr_dataset) ogr_dataset = NULL - if is_vsimem: - delete_vsimem_file(path) + if use_tmp_vsimem: + vsimem_rmtree_toplevel(path) return layers @@ -1931,6 +1932,16 @@ cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) except CPLE_BaseError as exc: raise DataSourceError(str(exc)) + # For /vsimem/ files, with GDAL >= 3.8 parent directories are created automatically. + IF CTE_GDAL_VERSION < (3, 8, 0): + path = path_c.decode("UTF-8") + if "/vsimem/" in path: + parent = str(Path(path).parent.as_posix()) + if not parent.endswith("/vsimem"): + retcode = VSIMkdirRecursive(parent.encode("UTF-8"), 0666) + if retcode != 0: + raise OSError(f"Could not create parent directory '{parent}'") + # Create the dataset try: ogr_dataset = exc_wrap_pointer(GDALCreate(ogr_driver, path_c, 0, 0, 0, GDT_Unknown, options)) @@ -2014,7 +2025,7 @@ cdef infer_field_types(list dtypes): cdef create_ogr_dataset_layer( str path, - bint is_vsi, + bint use_tmp_vsimem, str layer, str driver, str crs, @@ -2048,6 +2059,8 @@ cdef create_ogr_dataset_layer( encoding : str Only used if `driver` is "ESRI Shapefile". If not None, it overrules the default shapefile encoding, which is "UTF-8" in pyogrio. + use_tmp_vsimem : bool + Whether the file path is meant to save a temporary memory file to. Returns ------- @@ -2075,8 +2088,8 @@ cdef create_ogr_dataset_layer( driver_b = driver.encode('UTF-8') driver_c = driver_b - # in-memory dataset is always created from scratch - path_exists = os.path.exists(path) if not is_vsi else False + # temporary in-memory dataset is always created from scratch + path_exists = os.path.exists(path) if not use_tmp_vsimem else False if not layer: layer = os.path.splitext(os.path.split(path)[1])[0] @@ -2112,10 +2125,7 @@ cdef create_ogr_dataset_layer( raise exc # otherwise create from scratch - if is_vsi: - VSIUnlink(path_c) - else: - os.unlink(path) + os.unlink(path) ogr_dataset = NULL @@ -2250,7 +2260,7 @@ def ogr_write( cdef int num_records = -1 cdef int num_field_data = len(field_data) if field_data is not None else 0 cdef int num_fields = len(fields) if fields is not None else 0 - cdef bint is_vsi = False + cdef bint use_tmp_vsimem = False if num_fields != num_field_data: raise ValueError("field_data array needs to be same length as fields array") @@ -2291,12 +2301,11 @@ def ogr_write( try: # Setup in-memory handler if needed - path = get_ogr_vsimem_write_path(path_or_fp, driver) - is_vsi = path.startswith('/vsimem/') + path, use_tmp_vsimem = get_ogr_vsimem_write_path(path_or_fp, driver) # Setup dataset and layer layer_created = create_ogr_dataset_layer( - path, is_vsi, layer, driver, crs, geometry_type, encoding, + path, use_tmp_vsimem, layer, driver, crs, geometry_type, encoding, dataset_kwargs, layer_kwargs, append, dataset_metadata, layer_metadata, &ogr_dataset, &ogr_layer, @@ -2501,7 +2510,7 @@ def ogr_write( raise DataSourceError(f"Failed to write features to dataset {path}; {exc}") # copy in-memory file back to path_or_fp object - if is_vsi: + if use_tmp_vsimem: read_vsimem_to_buffer(path, path_or_fp) finally: @@ -2523,8 +2532,8 @@ def ogr_write( if ogr_dataset != NULL: ogr_close(ogr_dataset) - if is_vsi: - delete_vsimem_file(path) + if use_tmp_vsimem: + vsimem_rmtree_toplevel(path) def ogr_write_arrow( @@ -2548,7 +2557,7 @@ def ogr_write_arrow( cdef OGRDataSourceH ogr_dataset = NULL cdef OGRLayerH ogr_layer = NULL cdef char **options = NULL - cdef bint is_vsi = False + cdef bint use_tmp_vsimem = False cdef ArrowArrayStream* stream = NULL cdef ArrowSchema schema cdef ArrowArray array @@ -2557,11 +2566,11 @@ def ogr_write_arrow( array.release = NULL try: - path = get_ogr_vsimem_write_path(path_or_fp, driver) - is_vsi = path.startswith('/vsimem/') + # Setup in-memory handler if needed + path, use_tmp_vsimem = get_ogr_vsimem_write_path(path_or_fp, driver) layer_created = create_ogr_dataset_layer( - path, is_vsi, layer, driver, crs, geometry_type, encoding, + path, use_tmp_vsimem, layer, driver, crs, geometry_type, encoding, dataset_kwargs, layer_kwargs, append, dataset_metadata, layer_metadata, &ogr_dataset, &ogr_layer, @@ -2622,7 +2631,7 @@ def ogr_write_arrow( raise DataSourceError(f"Failed to write features to dataset {path}; {exc}") # copy in-memory file back to path_or_fp object - if is_vsi: + if use_tmp_vsimem: read_vsimem_to_buffer(path, path_or_fp) finally: @@ -2642,8 +2651,8 @@ def ogr_write_arrow( if ogr_dataset != NULL: ogr_close(ogr_dataset) - if is_vsi: - delete_vsimem_file(path) + if use_tmp_vsimem: + vsimem_rmtree_toplevel(path) cdef get_arrow_extension_metadata(const ArrowSchema* schema): diff --git a/pyogrio/_ogr.pxd b/pyogrio/_ogr.pxd index 9369ba71..8ce6a578 100644 --- a/pyogrio/_ogr.pxd +++ b/pyogrio/_ogr.pxd @@ -36,6 +36,10 @@ cdef extern from "cpl_error.h" nogil: void CPLPopErrorHandler() +cdef extern from "cpl_port.h": + ctypedef char **CSLConstList + + cdef extern from "cpl_string.h": char** CSLAddNameValue(char **list, const char *name, const char *value) char** CSLSetNameValue(char **list, const char *name, const char *value) @@ -53,6 +57,9 @@ cdef extern from "cpl_vsi.h" nogil: long st_mode int st_mtime + int VSIStatL(const char *path, VSIStatBufL *psStatBuf) + int VSI_ISDIR(int mode) + char** VSIReadDirRecursive(const char *path) int VSIFCloseL(VSILFILE *fp) int VSIFFlushL(VSILFILE *fp) int VSIUnlink(const char *path) @@ -61,7 +68,8 @@ cdef extern from "cpl_vsi.h" nogil: unsigned char *VSIGetMemFileBuffer(const char *path, vsi_l_offset *data_len, int take_ownership) int VSIMkdir(const char *path, long mode) - int VSIRmdirRecursive(const char *pszDirname) + int VSIMkdirRecursive(const char *path, long mode) + int VSIRmdirRecursive(const char *path) cdef extern from "ogr_core.h": diff --git a/pyogrio/_vsi.pxd b/pyogrio/_vsi.pxd index afa2633a..1c464489 100644 --- a/pyogrio/_vsi.pxd +++ b/pyogrio/_vsi.pxd @@ -1,4 +1,4 @@ -cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver) +cdef tuple get_ogr_vsimem_write_path(object path_or_fp, str driver) cdef str read_buffer_to_vsimem(bytes bytes_buffer) cdef read_vsimem_to_buffer(str path, object out_buffer) -cdef delete_vsimem_file(str path) \ No newline at end of file +cpdef vsimem_rmtree_toplevel(str path) \ No newline at end of file diff --git a/pyogrio/_vsi.pyx b/pyogrio/_vsi.pyx index 47b8c11d..757c2c78 100644 --- a/pyogrio/_vsi.pyx +++ b/pyogrio/_vsi.pyx @@ -1,3 +1,4 @@ +import fnmatch from io import BytesIO from uuid import uuid4 @@ -8,28 +9,44 @@ from pyogrio._ogr cimport * from pyogrio._ogr import _get_driver_metadata_item -cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver): - """ Return the original path or a /vsimem/ path +cdef tuple get_ogr_vsimem_write_path(object path_or_fp, str driver): + """Return the path to write to and whether it is a tmp vsimem filepath. - If passed a io.BytesIO object, this will return a /vsimem/ path that can be - used to create a new in-memory file with an extension inferred from the driver - if possible. Path will be contained in an in-memory directory to contain - sibling files (though drivers that create sibling files are not supported for - in-memory files). + If passed a io.BytesIO object to write to, a temporary vsimem file will be + used to write the data directly to memory. + Hence, a tuple will be returned with a /vsimem/ path and True to indicate + the path will be to a tmp vsimem file. + The path will have an extension inferred from the driver if possible. Path + will be contained in an in-memory directory to contain sibling files + (though drivers that create sibling files are not supported for in-memory + files). - Caller is responsible for deleting the directory via delete_vsimem_file() + Caller is responsible for deleting the directory via + vsimem_rmtree_toplevel(). Parameters ---------- path_or_fp : str or io.BytesIO object driver : str - """ + Returns + ------- + tuple of (path, use_tmp_vsimem) + Tuple of the path to write to and a bool indicating if the path is a + temporary vsimem filepath. + + """ + # The write path is not a BytesIO object, so return path as-is if not isinstance(path_or_fp, BytesIO): - return path_or_fp + return (path_or_fp, False) - # Create in-memory directory to contain auxiliary files - memfilename = uuid4().hex + # Check for existing bytes + if path_or_fp.getbuffer().nbytes > 0: + raise NotImplementedError("writing to existing in-memory object is not supported") + + # Create in-memory directory to contain auxiliary files. + # Prefix with "pyogrio_" so it is clear the directory was created by pyogrio. + memfilename = f"pyogrio_{uuid4().hex}" VSIMkdir(f"/vsimem/{memfilename}".encode("UTF-8"), 0666) # file extension is required for some drivers, set it based on driver metadata @@ -40,11 +57,7 @@ cdef str get_ogr_vsimem_write_path(object path_or_fp, str driver): path = f"/vsimem/{memfilename}/{memfilename}{ext}" - # check for existing bytes - if path_or_fp.getbuffer().nbytes > 0: - raise NotImplementedError("writing to existing in-memory object is not supported") - - return path + return (path, True) cdef str read_buffer_to_vsimem(bytes bytes_buffer): @@ -54,7 +67,8 @@ cdef str read_buffer_to_vsimem(bytes bytes_buffer): will be prefixed with /vsizip/ and suffixed with .zip to enable proper reading by GDAL. - Caller is responsible for deleting the in-memory file via delete_vsimem_file(). + Caller is responsible for deleting the in-memory file via + vsimem_rmtree_toplevel(). Parameters ---------- @@ -65,12 +79,15 @@ cdef str read_buffer_to_vsimem(bytes bytes_buffer): is_zipped = len(bytes_buffer) > 4 and bytes_buffer[:4].startswith(b"PK\x03\x04") ext = ".zip" if is_zipped else "" - path = f"/vsimem/{uuid4().hex}{ext}" + # Prefix with "pyogrio_" so it is clear the file was created by pyogrio. + path = f"/vsimem/pyogrio_{uuid4().hex}{ext}" # Create an in-memory object that references bytes_buffer # NOTE: GDAL does not copy the contents of bytes_buffer; it must remain # in scope through the duration of using this file - vsi_handle = VSIFileFromMemBuffer(path.encode("UTF-8"), bytes_buffer, num_bytes, 0) + vsi_handle = VSIFileFromMemBuffer( + path.encode("UTF-8"), bytes_buffer, num_bytes, 0 + ) if vsi_handle == NULL: raise OSError("failed to read buffer into in-memory file") @@ -88,8 +105,8 @@ cdef read_vsimem_to_buffer(str path, object out_buffer): """Copy bytes from in-memory file to buffer This will automatically unlink the in-memory file pointed to by path; caller - is still responsible for calling delete_vsimem_file() to cleanup any other - files contained in the in-memory directory. + is still responsible for calling vsimem_rmtree_toplevel() to cleanup any + other files contained in the in-memory directory. Parameters: ----------- @@ -118,23 +135,155 @@ cdef read_vsimem_to_buffer(str path, object out_buffer): CPLFree(vsi_buffer) -cdef delete_vsimem_file(str path): - """ Recursively delete in-memory path or directory containing path +cpdef vsimem_rmtree_toplevel(str path): + """Remove the top-level file or top-level directory containing the file. + + This is used for final cleanup of an in-memory dataset. The path can point + to either: + - a top-level file (directly in /vsimem/). + - a file in a directory, which may include sibling files. + - a zip file (reported as a directory by VSI_ISDIR). - This is used for final cleanup of an in-memory dataset, which may have been - created within a directory to contain sibling files. + Except for the first case, the top-level directory (direct subdirectory of + /vsimem/) will be determined and will be removed recursively. Additional VSI handlers may be chained to the left of /vsimem/ in path and will be ignored. + Even though it is only meant for "internal use", the function is declared + as cpdef, so it can be called from tests as well. + Parameters: ----------- path : str path to in-memory file + """ + cdef VSIStatBufL st_buf if "/vsimem/" not in path: - return + raise ValueError(f"Path is not a /vsimem/ path: '{path}'") + + # Determine the top-level directory of the file + mempath_parts = path.split("/vsimem/")[1].split("/") + if len(mempath_parts) == 0: + raise OSError("path to in-memory file or directory is required") + + toplevel_path = f"/vsimem/{mempath_parts[0]}" + + if not VSIStatL(toplevel_path.encode("UTF-8"), &st_buf) == 0: + raise FileNotFoundError(f"Path does not exist: '{path}'") + + if VSI_ISDIR(st_buf.st_mode): + errcode = VSIRmdirRecursive(toplevel_path.encode("UTF-8")) + else: + errcode = VSIUnlink(toplevel_path.encode("UTF-8")) + + if errcode != 0: + raise OSError(f"Error removing '{path}': {errcode=}") + + +def ogr_vsi_listtree(str path, str pattern): + """Recursively list the contents in a VSI directory. + + An fnmatch pattern can be specified to filter the directories/files + returned. + + Parameters: + ----------- + path : str + Path to the VSI directory to be listed. + pattern : str + Pattern to filter results, in fnmatch format. + + """ + cdef const char *path_c + cdef int n + cdef char** papszFiles + cdef VSIStatBufL st_buf - root = "/vsimem/" + path.split("/vsimem/")[1].split("/")[0] - VSIRmdirRecursive(root.encode("UTF-8")) + path_b = path.encode("UTF-8") + path_c = path_b + + if not VSIStatL(path_c, &st_buf) == 0: + raise FileNotFoundError(f"Path does not exist: '{path}'") + if not VSI_ISDIR(st_buf.st_mode): + raise NotADirectoryError(f"Path is not a directory: '{path}'") + + try: + papszFiles = VSIReadDirRecursive(path_c) + n = CSLCount(papszFiles) + files = [] + for i in range(n): + files.append(papszFiles[i].decode("UTF-8")) + finally: + CSLDestroy(papszFiles) + + # Apply filter pattern + if pattern is not None: + files = fnmatch.filter(files, pattern) + + # Prepend files with the base path + if not path.endswith("/"): + path = f"{path}/" + files = [f"{path}{file}" for file in files] + + return files + + +def ogr_vsi_rmtree(str path): + """Recursively remove VSI directory. + + Parameters: + ----------- + path : str + path to the VSI directory to be removed. + + """ + cdef const char *path_c + cdef VSIStatBufL st_buf + + try: + path_b = path.encode("UTF-8") + except UnicodeDecodeError: + path_b = path + path_c = path_b + if not VSIStatL(path_c, &st_buf) == 0: + raise FileNotFoundError(f"Path does not exist: '{path}'") + if not VSI_ISDIR(st_buf.st_mode): + raise NotADirectoryError(f"Path is not a directory: '{path}'") + if path.endswith("/vsimem") or path.endswith("/vsimem/"): + raise OSError("path to in-memory file or directory is required") + + errcode = VSIRmdirRecursive(path_c) + if errcode != 0: + raise OSError(f"Error in rmtree of '{path}': {errcode=}") + + +def ogr_vsi_unlink(str path): + """Remove VSI file. + + Parameters: + ----------- + path : str + path to the VSI file to be removed. + + """ + cdef const char *path_c + cdef VSIStatBufL st_buf + + try: + path_b = path.encode("UTF-8") + except UnicodeDecodeError: + path_b = path + path_c = path_b + + if not VSIStatL(path_c, &st_buf) == 0: + raise FileNotFoundError(f"Path does not exist: '{path}'") + + if VSI_ISDIR(st_buf.st_mode): + raise IsADirectoryError(f"Path is a directory: '{path}'") + + errcode = VSIUnlink(path_c) + if errcode != 0: + raise OSError(f"Error removing '{path}': {errcode=}") diff --git a/pyogrio/core.py b/pyogrio/core.py index add4725f..1fa18fa4 100644 --- a/pyogrio/core.py +++ b/pyogrio/core.py @@ -1,5 +1,8 @@ """Core functions to interact with OGR data sources.""" +from pathlib import Path +from typing import Optional, Union + from pyogrio._env import GDALEnv from pyogrio.util import ( _mask_to_wkb, @@ -23,6 +26,11 @@ ogr_list_drivers, set_gdal_config_options as _set_gdal_config_options, ) + from pyogrio._vsi import ( + ogr_vsi_listtree, + ogr_vsi_rmtree, + ogr_vsi_unlink, + ) _init_gdal_data() _init_proj_data() @@ -326,3 +334,53 @@ def get_gdal_data_path(): """ return _get_gdal_data_path() + + +def vsi_listtree(path: Union[str, Path], pattern: Optional[str] = None): + """Recursively list the contents of a VSI directory. + + An fnmatch pattern can be specified to filter the directories/files + returned. + + Parameters + ---------- + path : str or pathlib.Path + Path to the VSI directory to be listed. + pattern : str, optional + Pattern to filter results, in fnmatch format. + + """ + if isinstance(path, Path): + path = path.as_posix() + + return ogr_vsi_listtree(path, pattern=pattern) + + +def vsi_rmtree(path: Union[str, Path]): + """Recursively remove VSI directory. + + Parameters + ---------- + path : str or pathlib.Path + path to the VSI directory to be removed. + + """ + if isinstance(path, Path): + path = path.as_posix() + + ogr_vsi_rmtree(path) + + +def vsi_unlink(path: Union[str, Path]): + """Remove a VSI file. + + Parameters + ---------- + path : str or pathlib.Path + path to vsimem file to be removed + + """ + if isinstance(path, Path): + path = path.as_posix() + + ogr_vsi_unlink(path) diff --git a/pyogrio/raw.py b/pyogrio/raw.py index aaac0285..0f0c3063 100644 --- a/pyogrio/raw.py +++ b/pyogrio/raw.py @@ -563,7 +563,7 @@ def _get_write_path_driver(path, driver, append=False): ) else: - path = vsi_path(str(path)) + path = vsi_path(path) if driver is None: driver = detect_write_driver(path) diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index 262bc1a3..d6bea86b 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -17,6 +17,7 @@ HAS_PYPROJ, HAS_SHAPELY, ) +from pyogrio.core import vsi_rmtree from pyogrio.raw import read, write import pytest @@ -38,6 +39,15 @@ ALL_EXTS = [".fgb", ".geojson", ".geojsonl", ".gpkg", ".shp"] +START_FID = { + ".fgb": 0, + ".geojson": 0, + ".geojsonl": 0, + ".geojsons": 0, + ".gpkg": 1, + ".shp": 0, +} + def pytest_report_header(config): drivers = ", ".join( @@ -116,7 +126,7 @@ def naturalearth_lowres_all_ext(tmp_path, naturalearth_lowres, request): @pytest.fixture(scope="function") def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): - """Wrap naturalearth_lowres as a zip file for vsi tests""" + """Wrap naturalearth_lowres as a zip file for VSI tests""" path = tmp_path / f"{naturalearth_lowres.name}.zip" with ZipFile(path, mode="w", compression=ZIP_DEFLATED, compresslevel=5) as out: @@ -127,6 +137,22 @@ def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): return path, f"/vsizip/{path}/{naturalearth_lowres.name}" +@pytest.fixture(scope="function") +def naturalearth_lowres_vsimem(naturalearth_lowres): + """Write naturalearth_lowres to a vsimem file for VSI tests""" + + meta, _, geometry, field_data = read(naturalearth_lowres) + name = f"pyogrio_fixture_{naturalearth_lowres.stem}" + dst_path = Path(f"/vsimem/{name}/{name}.gpkg") + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + + write(dst_path, geometry, field_data, layer="naturalearth_lowres", **meta) + yield dst_path + + vsi_rmtree(dst_path.parent) + + @pytest.fixture(scope="session") def line_zm_file(): return _data_dir / "line_zm.gpkg" diff --git a/pyogrio/tests/test_arrow.py b/pyogrio/tests/test_arrow.py index 7b2d6673..0a89a92a 100644 --- a/pyogrio/tests/test_arrow.py +++ b/pyogrio/tests/test_arrow.py @@ -17,6 +17,7 @@ read_dataframe, read_info, set_gdal_config_options, + vsi_listtree, ) from pyogrio.errors import DataLayerError, DataSourceError, FieldError from pyogrio.raw import open_arrow, read_arrow, write, write_arrow @@ -162,6 +163,10 @@ def test_read_arrow_vsi(naturalearth_lowres_vsi): table = read_arrow(naturalearth_lowres_vsi[1])[1] assert len(table) == 177 + # Check temp file was cleaned up. Filter to files created by pyogrio, as GDAL keeps + # cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_read_arrow_bytes(geojson_bytes): meta, table = read_arrow(geojson_bytes) @@ -169,12 +174,18 @@ def test_read_arrow_bytes(geojson_bytes): assert meta["fields"].shape == (5,) assert len(table) == 3 + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_read_arrow_nonseekable_bytes(nonseekable_bytes): meta, table = read_arrow(nonseekable_bytes) assert meta["fields"].shape == (0,) assert len(table) == 1 + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_read_arrow_filelike(geojson_filelike): meta, table = read_arrow(geojson_filelike) @@ -182,6 +193,9 @@ def test_read_arrow_filelike(geojson_filelike): assert meta["fields"].shape == (5,) assert len(table) == 3 + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_open_arrow_pyarrow(naturalearth_lowres): with open_arrow(naturalearth_lowres, use_pyarrow=True) as (meta, reader): @@ -968,6 +982,9 @@ def test_write_memory_driver_required(naturalearth_lowres): geometry_name=meta["geometry_name"] or "wkb_geometry", ) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @requires_arrow_write_api @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"]) @@ -1074,6 +1091,9 @@ def test_write_open_file_handle(tmp_path, naturalearth_lowres): geometry_name=meta["geometry_name"] or "wkb_geometry", ) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @requires_arrow_write_api def test_non_utf8_encoding_io_shapefile(tmp_path, encoded_text): diff --git a/pyogrio/tests/test_core.py b/pyogrio/tests/test_core.py index 1d593466..e0ff6d49 100644 --- a/pyogrio/tests/test_core.py +++ b/pyogrio/tests/test_core.py @@ -1,9 +1,12 @@ +from pathlib import Path + import numpy as np from numpy import allclose, array_equal from pyogrio import ( __gdal_geos_version__, __gdal_version__, + detect_write_driver, get_gdal_config_option, get_gdal_data_path, list_drivers, @@ -11,12 +14,15 @@ read_bounds, read_info, set_gdal_config_options, + vsi_listtree, + vsi_rmtree, + vsi_unlink, ) from pyogrio._compat import GDAL_GE_38 from pyogrio._env import GDALEnv -from pyogrio.core import detect_write_driver from pyogrio.errors import DataLayerError, DataSourceError -from pyogrio.tests.conftest import prepare_testfile, requires_shapely +from pyogrio.raw import read, write +from pyogrio.tests.conftest import START_FID, prepare_testfile, requires_shapely import pytest @@ -154,6 +160,7 @@ def test_list_drivers(): def test_list_layers( naturalearth_lowres, naturalearth_lowres_vsi, + naturalearth_lowres_vsimem, line_zm_file, curve_file, curve_polygon_file, @@ -168,6 +175,11 @@ def test_list_layers( list_layers(naturalearth_lowres_vsi[1]), [["naturalearth_lowres", "Polygon"]] ) + assert array_equal( + list_layers(naturalearth_lowres_vsimem), + [["naturalearth_lowres", "MultiPolygon"]], + ) + # Measured 3D is downgraded to plain 3D during read # Make sure this warning is raised with pytest.warns( @@ -207,22 +219,18 @@ def test_list_layers_filelike(geojson_filelike): assert layers[0, 0] == "test" -def test_read_bounds(naturalearth_lowres): - fids, bounds = read_bounds(naturalearth_lowres) - assert fids.shape == (177,) - assert bounds.shape == (4, 177) - - assert fids[0] == 0 - # Fiji; wraps antimeridian - assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088]) - +@pytest.mark.parametrize( + "testfile", + ["naturalearth_lowres", "naturalearth_lowres_vsimem", "naturalearth_lowres_vsi"], +) +def test_read_bounds(testfile, request): + path = request.getfixturevalue(testfile) + path = path if not isinstance(path, tuple) else path[1] -def test_read_bounds_vsi(naturalearth_lowres_vsi): - fids, bounds = read_bounds(naturalearth_lowres_vsi[1]) + fids, bounds = read_bounds(path) assert fids.shape == (177,) assert bounds.shape == (4, 177) - - assert fids[0] == 0 + assert fids[0] == START_FID[Path(path).suffix] # Fiji; wraps antimeridian assert allclose(bounds[:, 0], [-180.0, -18.28799, 180.0, -16.02088]) @@ -308,12 +316,9 @@ def test_read_bounds_bbox(naturalearth_lowres_all_ext): fids, bounds = read_bounds(naturalearth_lowres_all_ext, bbox=(-85, 8, -80, 10)) assert fids.shape == (2,) - if naturalearth_lowres_all_ext.suffix == ".gpkg": - # fid in gpkg is 1-based - assert array_equal(fids, [34, 35]) # PAN, CRI - else: - # fid in other formats is 0-based - assert array_equal(fids, [33, 34]) # PAN, CRI + fids_expected = np.array([33, 34]) # PAN, CRI + fids_expected += START_FID[naturalearth_lowres_all_ext.suffix] + assert array_equal(fids, fids_expected) assert bounds.shape == (4, 2) assert allclose( @@ -378,12 +383,8 @@ def test_read_bounds_mask(naturalearth_lowres_all_ext, mask, expected): fids = read_bounds(naturalearth_lowres_all_ext, mask=mask)[0] - if naturalearth_lowres_all_ext.suffix == ".gpkg": - # fid in gpkg is 1-based - assert array_equal(fids, np.array(expected) + 1) - else: - # fid in other formats is 0-based - assert array_equal(fids, expected) + fids_expected = np.array(expected) + START_FID[naturalearth_lowres_all_ext.suffix] + assert array_equal(fids, fids_expected) @pytest.mark.skipif( @@ -399,21 +400,15 @@ def test_read_bounds_bbox_intersects_vs_envelope_overlaps(naturalearth_lowres_al if __gdal_geos_version__ is None: # bboxes for CAN, RUS overlap but do not intersect geometries assert fids.shape == (4,) - if naturalearth_lowres_all_ext.suffix == ".gpkg": - # fid in gpkg is 1-based - assert array_equal(fids, [4, 5, 19, 28]) # CAN, USA, RUS, MEX - else: - # fid in other formats is 0-based - assert array_equal(fids, [3, 4, 18, 27]) # CAN, USA, RUS, MEX + fids_expected = np.array([3, 4, 18, 27]) # CAN, USA, RUS, MEX + fids_expected += START_FID[naturalearth_lowres_all_ext.suffix] + assert array_equal(fids, fids_expected) else: assert fids.shape == (2,) - if naturalearth_lowres_all_ext.suffix == ".gpkg": - # fid in gpkg is 1-based - assert array_equal(fids, [5, 28]) # USA, MEX - else: - # fid in other formats is 0-based - assert array_equal(fids, [4, 27]) # USA, MEX + fids_expected = np.array([4, 27]) # USA, MEX + fids_expected += START_FID[naturalearth_lowres_all_ext.suffix] + assert array_equal(fids, fids_expected) @pytest.mark.parametrize("naturalearth_lowres", [".shp", ".gpkg"], indirect=True) @@ -453,8 +448,14 @@ def test_read_info(naturalearth_lowres): raise ValueError(f"test not implemented for ext {naturalearth_lowres.suffix}") -def test_read_info_vsi(naturalearth_lowres_vsi): - meta = read_info(naturalearth_lowres_vsi[1]) +@pytest.mark.parametrize( + "testfile", ["naturalearth_lowres_vsimem", "naturalearth_lowres_vsi"] +) +def test_read_info_vsi(testfile, request): + path = request.getfixturevalue(testfile) + path = path if not isinstance(path, tuple) else path[1] + + meta = read_info(path) assert meta["fields"].shape == (5,) assert meta["features"] == 177 @@ -611,3 +612,67 @@ def test_error_handling_warning(capfd, naturalearth_lowres): read_info(naturalearth_lowres, INVALID="YES") assert capfd.readouterr().err == "" + + +def test_vsimem_listtree_rmtree_unlink(naturalearth_lowres): + """Test all basic functionalities of file handling in /vsimem/.""" + # Prepare test data in /vsimem + meta, _, geometry, field_data = read(naturalearth_lowres) + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + test_file_path = Path("/vsimem/pyogrio_test_naturalearth_lowres.gpkg") + test_dir_path = Path(f"/vsimem/pyogrio_dir_test/{naturalearth_lowres.stem}.gpkg") + + write(test_file_path, geometry, field_data, **meta) + write(test_dir_path, geometry, field_data, **meta) + + # Check if everything was created properly with listtree + files = vsi_listtree("/vsimem/") + assert test_file_path.as_posix() in files + assert test_dir_path.as_posix() in files + + # Check listtree with pattern + files = vsi_listtree("/vsimem/", pattern="pyogrio_dir_test*.gpkg") + assert test_file_path.as_posix() not in files + assert test_dir_path.as_posix() in files + + files = vsi_listtree("/vsimem/", pattern="pyogrio_test*.gpkg") + assert test_file_path.as_posix() in files + assert test_dir_path.as_posix() not in files + + # Remove test_dir and its contents + vsi_rmtree(test_dir_path.parent) + files = vsi_listtree("/vsimem/") + assert test_file_path.as_posix() in files + assert test_dir_path.as_posix() not in files + + # Remove test_file + vsi_unlink(test_file_path) + + +def test_vsimem_rmtree_error(naturalearth_lowres_vsimem): + with pytest.raises(NotADirectoryError, match="Path is not a directory"): + vsi_rmtree(naturalearth_lowres_vsimem) + + with pytest.raises(FileNotFoundError, match="Path does not exist"): + vsi_rmtree("/vsimem/non-existent") + + with pytest.raises( + OSError, match="path to in-memory file or directory is required" + ): + vsi_rmtree("/vsimem") + with pytest.raises( + OSError, match="path to in-memory file or directory is required" + ): + vsi_rmtree("/vsimem/") + + # Verify that naturalearth_lowres_vsimem still exists. + assert naturalearth_lowres_vsimem.as_posix() in vsi_listtree("/vsimem") + + +def test_vsimem_unlink_error(naturalearth_lowres_vsimem): + with pytest.raises(IsADirectoryError, match="Path is a directory"): + vsi_unlink(naturalearth_lowres_vsimem.parent) + + with pytest.raises(FileNotFoundError, match="Path does not exist"): + vsi_unlink("/vsimem/non-existent.gpkg") diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 74efa6f7..c70ac820 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -7,7 +7,14 @@ import numpy as np -from pyogrio import __gdal_version__, list_drivers, list_layers, read_info +from pyogrio import ( + __gdal_version__, + list_drivers, + list_layers, + read_info, + vsi_listtree, + vsi_unlink, +) from pyogrio._compat import HAS_ARROW_WRITE_API, HAS_PYPROJ, PANDAS_GE_15 from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe @@ -18,6 +25,7 @@ from pyogrio.tests.conftest import ( ALL_EXTS, DRIVERS, + START_FID, requires_arrow_write_api, requires_gdal_geos, requires_pyarrow_api, @@ -371,12 +379,9 @@ def test_read_fid_as_index(naturalearth_lowres_all_ext, use_arrow): fid_as_index=True, **kwargs, ) - if naturalearth_lowres_all_ext.suffix in [".gpkg"]: - # File format where fid starts at 1 - assert_index_equal(df.index, pd.Index([3, 4], name="fid")) - else: - # File format where fid starts at 0 - assert_index_equal(df.index, pd.Index([2, 3], name="fid")) + fids_expected = pd.Index([2, 3], name="fid") + fids_expected += START_FID[naturalearth_lowres_all_ext.suffix] + assert_index_equal(df.index, fids_expected) def test_read_fid_as_index_only(naturalearth_lowres, use_arrow): @@ -1568,6 +1573,22 @@ def test_write_read_null(tmp_path, use_arrow): assert result_gdf["object_str"][2] is None +@pytest.mark.requires_arrow_write_api +def test_write_read_vsimem(naturalearth_lowres_vsi, use_arrow): + path, _ = naturalearth_lowres_vsi + mem_path = f"/vsimem/{path.name}" + + input = read_dataframe(path, use_arrow=use_arrow) + assert len(input) == 177 + + try: + write_dataframe(input, mem_path, use_arrow=use_arrow) + result = read_dataframe(mem_path, use_arrow=use_arrow) + assert len(result) == 177 + finally: + vsi_unlink(mem_path) + + @pytest.mark.parametrize( "wkt,geom_types", [ @@ -1974,6 +1995,9 @@ def test_write_memory(naturalearth_lowres, driver): check_dtype=not is_json, ) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_write_memory_driver_required(naturalearth_lowres): df = read_dataframe(naturalearth_lowres) @@ -1986,6 +2010,9 @@ def test_write_memory_driver_required(naturalearth_lowres): ): write_dataframe(df.head(1), buffer, driver=None, layer="test") + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @pytest.mark.parametrize("driver", ["ESRI Shapefile", "OpenFileGDB"]) def test_write_memory_unsupported_driver(naturalearth_lowres, driver): @@ -2001,6 +2028,9 @@ def test_write_memory_unsupported_driver(naturalearth_lowres, driver): ): write_dataframe(df, buffer, driver=driver, layer="test") + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @pytest.mark.parametrize("driver", ["GeoJSON", "GPKG"]) def test_write_memory_append_unsupported(naturalearth_lowres, driver): @@ -2013,6 +2043,9 @@ def test_write_memory_append_unsupported(naturalearth_lowres, driver): ): write_dataframe(df.head(1), buffer, driver=driver, layer="test", append=True) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_write_memory_existing_unsupported(naturalearth_lowres): df = read_dataframe(naturalearth_lowres) @@ -2024,6 +2057,9 @@ def test_write_memory_existing_unsupported(naturalearth_lowres): ): write_dataframe(df.head(1), buffer, driver="GeoJSON", layer="test") + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + def test_write_open_file_handle(tmp_path, naturalearth_lowres): """Verify that writing to an open file handle is not currently supported""" @@ -2045,6 +2081,9 @@ def test_write_open_file_handle(tmp_path, naturalearth_lowres): with z.open("test.geojson", "w") as f: write_dataframe(df.head(1), f) + # Check temp file was cleaned up. Filter, as gdal keeps cache files in /vsimem/. + assert vsi_listtree("/vsimem/", pattern="pyogrio_*") == [] + @pytest.mark.parametrize("ext", ["gpkg", "geojson"]) def test_non_utf8_encoding_io(tmp_path, ext, encoded_text): diff --git a/pyogrio/tests/test_path.py b/pyogrio/tests/test_path.py index 6a3010e7..9cc7943c 100644 --- a/pyogrio/tests/test_path.py +++ b/pyogrio/tests/test_path.py @@ -33,6 +33,7 @@ def change_cwd(path): [ # local file paths that should be passed through as is ("data.gpkg", "data.gpkg"), + (Path("data.gpkg"), "data.gpkg"), ("/home/user/data.gpkg", "/home/user/data.gpkg"), (r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"), ("file:///home/user/data.gpkg", "/home/user/data.gpkg"), @@ -85,6 +86,8 @@ def change_cwd(path): "s3://testing/test.zip!a/b/item.shp", "/vsizip/vsis3/testing/test.zip/a/b/item.shp", ), + ("/vsimem/data.gpkg", "/vsimem/data.gpkg"), + (Path("/vsimem/data.gpkg"), "/vsimem/data.gpkg"), ], ) def test_vsi_path(path, expected): @@ -339,19 +342,23 @@ def test_uri_s3_dataframe(aws_env_setup): assert len(df) == 67 -def test_get_vsi_path_or_buffer_obj_to_string(): - path = Path("/tmp/test.gpkg") - assert get_vsi_path_or_buffer(path) == str(path) +@pytest.mark.parametrize( + "path, expected", + [ + (Path("/tmp/test.gpkg"), str(Path("/tmp/test.gpkg"))), + (Path("/vsimem/test.gpkg"), "/vsimem/test.gpkg"), + ], +) +def test_get_vsi_path_or_buffer_obj_to_string(path, expected): + """Verify that get_vsi_path_or_buffer retains forward slashes in /vsimem paths. + + The /vsimem paths should keep forward slashes for GDAL to recognize them as such. + However, on Windows systems, forward slashes are by default replaced by backslashes, + so this test verifies that this doesn't happen for /vsimem paths. + """ + assert get_vsi_path_or_buffer(path) == expected def test_get_vsi_path_or_buffer_fixtures_to_string(tmp_path): path = tmp_path / "test.gpkg" assert get_vsi_path_or_buffer(path) == str(path) - - -@pytest.mark.parametrize( - "raw_path", ["/vsimem/test.shp.zip", "/vsizip//vsimem/test.shp.zip"] -) -def test_vsimem_path_exception(raw_path): - with pytest.raises(ValueError, match=""): - vsi_path(raw_path) diff --git a/pyogrio/tests/test_util.py b/pyogrio/tests/test_util.py new file mode 100644 index 00000000..52ef2a83 --- /dev/null +++ b/pyogrio/tests/test_util.py @@ -0,0 +1,56 @@ +from pathlib import Path + +from pyogrio import vsi_listtree, vsi_unlink +from pyogrio.raw import read, write +from pyogrio.util import vsimem_rmtree_toplevel + +import pytest + + +def test_vsimem_rmtree_toplevel(naturalearth_lowres): + # Prepare test data in /vsimem/ + meta, _, geometry, field_data = read(naturalearth_lowres) + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + test_dir_path = Path(f"/vsimem/test/{naturalearth_lowres.stem}.gpkg") + test_dir2_path = Path(f"/vsimem/test2/test2/{naturalearth_lowres.stem}.gpkg") + + write(test_dir_path, geometry, field_data, **meta) + write(test_dir2_path, geometry, field_data, **meta) + + # Check if everything was created properly with listtree + files = vsi_listtree("/vsimem/") + assert test_dir_path.as_posix() in files + assert test_dir2_path.as_posix() in files + + # Test deleting parent dir of file in single directory + vsimem_rmtree_toplevel(test_dir_path) + files = vsi_listtree("/vsimem/") + assert test_dir_path.parent.as_posix() not in files + assert test_dir2_path.as_posix() in files + + # Test deleting top-level dir of file in a subdirectory + vsimem_rmtree_toplevel(test_dir2_path) + assert test_dir2_path.as_posix() not in vsi_listtree("/vsimem/") + + +def test_vsimem_rmtree_toplevel_error(naturalearth_lowres): + # Prepare test data in /vsimem + meta, _, geometry, field_data = read(naturalearth_lowres) + meta["spatial_index"] = False + meta["geometry_type"] = "MultiPolygon" + test_file_path = Path(f"/vsimem/pyogrio_test_{naturalearth_lowres.stem}.gpkg") + + write(test_file_path, geometry, field_data, **meta) + assert test_file_path.as_posix() in vsi_listtree("/vsimem/") + + # Deleting parent dir of non-existent file should raise an error. + with pytest.raises(FileNotFoundError, match="Path does not exist"): + vsimem_rmtree_toplevel("/vsimem/test/non-existent.gpkg") + + # File should still be there + assert test_file_path.as_posix() in vsi_listtree("/vsimem/") + + # Cleanup. + vsi_unlink(test_file_path) + assert test_file_path not in vsi_listtree("/vsimem/") diff --git a/pyogrio/util.py b/pyogrio/util.py index d0e76446..b018ad79 100644 --- a/pyogrio/util.py +++ b/pyogrio/util.py @@ -4,11 +4,14 @@ import sys from packaging.version import Version from pathlib import Path +from typing import Union from urllib.parse import urlparse +from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel + def get_vsi_path_or_buffer(path_or_buffer): - """Get vsi-prefixed path or bytes buffer depending on type of path_or_buffer. + """Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer. If path_or_buffer is a bytes object, it will be returned directly and will be read into an in-memory dataset when passed to one of the Cython functions. @@ -29,9 +32,10 @@ def get_vsi_path_or_buffer(path_or_buffer): str or bytes """ - # force path objects to string to specifically ignore their read method + # treat Path objects here already to ignore their read method + to avoid backslashes + # on Windows. if isinstance(path_or_buffer, Path): - return vsi_path(str(path_or_buffer)) + return vsi_path(path_or_buffer) if isinstance(path_or_buffer, bytes): return path_or_buffer @@ -48,13 +52,14 @@ def get_vsi_path_or_buffer(path_or_buffer): return vsi_path(str(path_or_buffer)) -def vsi_path(path: str) -> str: - """Ensure path is a local path or a GDAL-compatible vsi path.""" - if "/vsimem/" in path: - raise ValueError( - "path cannot contain /vsimem/ directly; to use an in-memory dataset a " - "bytes object must be passed instead" - ) +def vsi_path(path: Union[str, Path]) -> str: + """Ensure path is a local path or a GDAL-compatible VSI path.""" + # Convert Path objects to string, but for VSI paths, keep posix style path. + if isinstance(path, Path): + if sys.platform == "win32" and path.as_posix().startswith("/vsi"): + path = path.as_posix() + else: + path = str(path) # path is already in GDAL format if path.startswith("/vsi"): @@ -217,3 +222,26 @@ def _mask_to_wkb(mask): raise ValueError("'mask' parameter must be a Shapely geometry") return shapely.to_wkb(mask) + + +def vsimem_rmtree_toplevel(path: Union[str, Path]): + """Remove the parent directory of the file path recursively. + + This is used for final cleanup of an in-memory dataset, which may have been + created within a directory to contain sibling files. + + Additional VSI handlers may be chained to the left of /vsimem/ in path and + will be ignored. + + Remark: function is defined here to be able to run tests on it. + + Parameters + ---------- + path : str or pathlib.Path + path to in-memory file + + """ + if isinstance(path, Path): + path = path.as_posix() + + _vsimem_rmtree_toplevel(path) From 0a83d43664852e884107be7952f02a3bd90db958 Mon Sep 17 00:00:00 2001 From: Brendan Ward Date: Fri, 27 Sep 2024 11:40:04 -0700 Subject: [PATCH 09/27] DOC: Add docs for working with in-memory datasets (#477) --- docs/source/introduction.md | 67 +++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/docs/source/introduction.md b/docs/source/introduction.md index c62357e1..b24862d4 100644 --- a/docs/source/introduction.md +++ b/docs/source/introduction.md @@ -531,6 +531,73 @@ You can also use a dictionary to specify either `dataset_options` or >>> write_dataframe(df, '/tmp/test.gpkg', dataset_options={"version": "1.0"}, layer_options={"geometry_name": "the_geom"}) ``` +## Reading from and writing to in-memory datasets + +It is possible to read from a dataset stored as bytes: + +```python +from io import BytesIO + +# save a GeoJSON to bytes +geojson = """{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { }, + "geometry": { "type": "Point", "coordinates": [1, 1] } + } + ] +}""" + +geojson_bytes = BytesIO(geojson.encode("UTF-8")) + +df = read_dataframe(geojson_bytes) +``` + +Note: this may emit a `RuntimeWarning` where the in-memory dataset is detected +to be a particular format but lacks the expected file extension (e.g., `.gpkg`) +because the in-memory path automatically created by pyogrio does not include the +extension. + +It is also possible to write a dataset to bytes, but driver must also be +specified, and layer name should be specified to avoid it being set to a random +character string: + +```python +buffer = BytesIO() + +write_dataframe(df, buffer, layer="my_layer", driver="GPKG") + +out_bytes = buffer.getvalue() +``` + +Note: this is limited to single-file data formats (e.g., GPKG) and does not +support formats with multiple files (e.g., ESRI Shapefile). + +It is also possible to use a `/vsimem/` in-memory dataset with other GDAL-based +packages that support the `/vsimem/` interface, such as the `gdal` package: + +```python +from osgeo import gdal + +write_dataframe(df, "/vsimem/test.gpkg", layer="my_layer", driver="GPKG") + +# perform some operation using it +gdal.Rasterize("test.tif", "/vsimem/test.gpkg", outputType=gdal.GDT_Byte, noData=255, initValues=255, xRes=0.1, yRes=-0.1, allTouched=True, burnValues=1) + +# release the memory using pyogrio +from pyogrio import vsi_unlink + +vsi_unlink("/vsimem/test.gpkg") +``` + +Pyogrio can also read from a valid `/vsimem/` file created using a different +package. + +It is the user's responsibility to clean up the in-memory filesystem; pyogrio +will not automatically release those resources. + ## Configuration options It is possible to set From bf0228495ecc71b4477550d51f1e863c336a961e Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Sat, 28 Sep 2024 17:15:34 +0200 Subject: [PATCH 10/27] TST: skip multisurface test with arrow to avoid crash (#479) --- pyogrio/tests/test_geopandas_io.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index c70ac820..0675c197 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -1765,6 +1765,10 @@ def test_read_invalid_poly_ring(tmp_path, use_arrow, on_invalid, message): def test_read_multisurface(multisurface_file, use_arrow): if use_arrow: + # TODO: revisit once https://github.com/geopandas/pyogrio/issues/478 + # is resolved. + pytest.skip("Shapely + GEOS 3.13 crashes in from_wkb for this case") + with pytest.raises(shapely.errors.GEOSException): # TODO(Arrow) # shapely fails parsing the WKB From 5253bdda66d37c93d019a919f3f2b229ae562c13 Mon Sep 17 00:00:00 2001 From: Brendan Ward Date: Sat, 28 Sep 2024 11:19:44 -0700 Subject: [PATCH 11/27] CI: Try to resolve mamba install issue on windows when using with pip (#480) --- ci/envs/libgdal3.5.1.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/ci/envs/libgdal3.5.1.yml b/ci/envs/libgdal3.5.1.yml index b0a2c45c..a37aa2f1 100644 --- a/ci/envs/libgdal3.5.1.yml +++ b/ci/envs/libgdal3.5.1.yml @@ -5,8 +5,3 @@ dependencies: - numpy - libgdal==3.5.1 - pytest - - geopandas-base - - pip - - pip: - # install Shapely >= 2.0 using pip because it is not available on conda-forge for above libgdal - - shapely>=2 From eb8e7889224155ffa0f779360db29f07f370eef1 Mon Sep 17 00:00:00 2001 From: "Brendan C. Ward" Date: Sat, 28 Sep 2024 11:22:24 -0700 Subject: [PATCH 12/27] RLS: v0.10.0 --- CHANGES.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 66c15f07..ac51e8c7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,10 +1,10 @@ # CHANGELOG -## 0.10.0 (yyyy-mm-dd) +## 0.10.0 (2024-09-28) ### Improvements -- Add support to read, write, list, and remove `/vsimem/` files (#457) +- Add support to read, write, list, and remove `/vsimem/` files (#457). ### Bug fixes From 247bf6f2187afae480770a0f0c7514e0159eac72 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Tue, 1 Oct 2024 23:13:11 +0200 Subject: [PATCH 13/27] DOC: update pyogrio introduction (#481) --- README.md | 54 ++++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 8b9f8868..9e5e9708 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,29 @@ -# pyogrio - Vectorized spatial vector file format I/O using GDAL/OGR - -Pyogrio provides a -[GeoPandas](https://github.com/geopandas/geopandas)-oriented API to OGR vector -data sources, such as ESRI Shapefile, GeoPackage, and GeoJSON. Vector data sources -have geometries, such as points, lines, or polygons, and associated records -with potentially many columns worth of data. - -Pyogrio uses a vectorized approach for reading and writing GeoDataFrames to and -from OGR vector data sources in order to give you faster interoperability. It -uses pre-compiled bindings for GDAL/OGR so that the performance is primarily -limited by the underlying I/O speed of data source drivers in GDAL/OGR rather -than multiple steps of converting to and from Python data types within Python. +# pyogrio - bulk-oriented spatial vector file I/O using GDAL/OGR + +Pyogrio provides fast, bulk-oriented read and write access to +[GDAL/OGR](https://gdal.org/en/latest/drivers/vector/index.html) vector data +sources, such as ESRI Shapefile, GeoPackage, GeoJSON, and several others. +Vector data sources typically have geometries, such as points, lines, or +polygons, and associated records with potentially many columns worth of data. + +The typical use is to read or write these data sources to/from +[GeoPandas](https://github.com/geopandas/geopandas) `GeoDataFrames`. Because +the geometry column is optional, reading or writing only non-spatial data is +also possible. Hence, GeoPackage attribute tables, DBF files, or CSV files are +also supported. + +Pyogrio is fast because it uses pre-compiled bindings for GDAL/OGR to read and +write the data records in bulk. This approach avoids multiple steps of +converting to and from Python data types within Python, so performance becomes +primarily limited by the underlying I/O speed of data source drivers in +GDAL/OGR. We have seen \>5-10x speedups reading files and \>5-20x speedups writing files -compared to using non-vectorized approaches (Fiona and current I/O support in -GeoPandas). - -You can read these data sources into -`GeoDataFrames`, read just the non-geometry columns into Pandas `DataFrames`, -or even read non-spatial data sources that exist alongside vector data sources, -such as tables in a ESRI File Geodatabase, or antiquated DBF files. - -Pyogrio also enables you to write `GeoDataFrames` to at least a few different -OGR vector data source formats. +compared to using row-per-row approaches (e.g. Fiona). Read the documentation for more information: [https://pyogrio.readthedocs.io](https://pyogrio.readthedocs.io/en/latest/). -WARNING: Pyogrio is still at an early version and the API is subject to -substantial change. Please see [CHANGES](CHANGES.md). - ## Requirements Supports Python 3.9 - 3.13 and GDAL 3.4.x - 3.9.x. @@ -52,9 +46,9 @@ for more information. ## Supported vector formats -Pyogrio supports some of the most common vector data source formats (provided -they are also supported by GDAL/OGR), including ESRI Shapefile, GeoPackage, -GeoJSON, and FlatGeobuf. +Pyogrio supports most common vector data source formats (provided they are also +supported by GDAL/OGR), including ESRI Shapefile, GeoPackage, GeoJSON, and +FlatGeobuf. Please see the [list of supported formats](https://pyogrio.readthedocs.io/en/latest/supported_formats.html) for more information. @@ -64,7 +58,7 @@ for more information. Please read the [introduction](https://pyogrio.readthedocs.io/en/latest/supported_formats.html) for more information and examples to get started using Pyogrio. -You can also check out the the [API documentation](https://pyogrio.readthedocs.io/en/latest/api.html) +You can also check out the [API documentation](https://pyogrio.readthedocs.io/en/latest/api.html) for full details on using the API. ## Credits From d4216e9119479c9ff9322435e9947247916bebf6 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Tue, 1 Oct 2024 23:27:13 +0200 Subject: [PATCH 14/27] DOC: sync readme changes to docs (#482) --- docs/source/about.md | 2 +- docs/source/index.md | 47 +++++++++++++++++++------------------------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/docs/source/about.md b/docs/source/about.md index 8c71b05e..935f9240 100644 --- a/docs/source/about.md +++ b/docs/source/about.md @@ -22,7 +22,7 @@ for working with OGR vector data sources. It is **awesome**, has highly-dedicate maintainers and contributors, and exposes more functionality than Pyogrio ever will. This project would not be possible without Fiona having come first. -Pyogrio uses a vectorized (array-oriented) approach for reading and writing +Pyogrio uses a bulk-oriented approach for reading and writing spatial vector file formats, which enables faster I/O operations. It borrows from the internal mechanics and lessons learned of Fiona. It uses a stateless approach to reading or writing data; all data are read or written in a single diff --git a/docs/source/index.md b/docs/source/index.md index 02a81af2..bc008d6f 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,32 +1,25 @@ -# pyogrio - Vectorized spatial vector file format I/O using GDAL/OGR - -Pyogrio provides a -[GeoPandas](https://github.com/geopandas/geopandas)-oriented API to OGR vector -data sources, such as ESRI Shapefile, GeoPackage, and GeoJSON. Vector data sources -have geometries, such as points, lines, or polygons, and associated records -with potentially many columns worth of data. - -Pyogrio uses a vectorized approach for reading and writing GeoDataFrames to and -from OGR vector data sources in order to give you faster interoperability. It -uses pre-compiled bindings for GDAL/OGR so that the performance is primarily -limited by the underlying I/O speed of data source drivers in GDAL/OGR rather -than multiple steps of converting to and from Python data types within Python. +# pyogrio - bulk-oriented spatial vector file I/O using GDAL/OGR + +Pyogrio provides fast, bulk-oriented read and write access to +[GDAL/OGR](https://gdal.org/en/latest/drivers/vector/index.html) vector data +sources, such as ESRI Shapefile, GeoPackage, GeoJSON, and several others. +Vector data sources typically have geometries, such as points, lines, or +polygons, and associated records with potentially many columns worth of data. + +The typical use is to read or write these data sources to/from +[GeoPandas](https://github.com/geopandas/geopandas) `GeoDataFrames`. Because +the geometry column is optional, reading or writing only non-spatial data is +also possible. Hence, GeoPackage attribute tables, DBF files, or CSV files are +also supported. + +Pyogrio is fast because it uses pre-compiled bindings for GDAL/OGR to read and +write the data records in bulk. This approach avoids multiple steps of +converting to and from Python data types within Python, so performance becomes +primarily limited by the underlying I/O speed of data source drivers in +GDAL/OGR. We have seen \>5-10x speedups reading files and \>5-20x speedups writing files -compared to using non-vectorized approaches (Fiona and current I/O support in -GeoPandas). - -You can read these data sources into -`GeoDataFrames`, read just the non-geometry columns into Pandas `DataFrames`, -or even read non-spatial data sources that exist alongside vector data sources, -such as tables in a ESRI File Geodatabase, or antiquated DBF files. - -Pyogrio also enables you to write `GeoDataFrames` to at least a few different -OGR vector data source formats. - -```{warning} -Pyogrio is still at an early version and the API is subject to substantial change. -``` +compared to using row-per-row approaches (e.g. Fiona). ```{toctree} --- From ffe53c68b1154e714682a5b2ce74ca5be80cb31f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 13:16:37 +0200 Subject: [PATCH 15/27] Bump mamba-org/setup-micromamba from 1 to 2 (#483) Bumps [mamba-org/setup-micromamba](https://github.com/mamba-org/setup-micromamba) from 1 to 2. - [Release notes](https://github.com/mamba-org/setup-micromamba/releases) - [Commits](https://github.com/mamba-org/setup-micromamba/compare/v1...v2) --- updated-dependencies: - dependency-name: mamba-org/setup-micromamba dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/tests-conda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests-conda.yml b/.github/workflows/tests-conda.yml index 8fb37da5..4f13e44b 100644 --- a/.github/workflows/tests-conda.yml +++ b/.github/workflows/tests-conda.yml @@ -50,7 +50,7 @@ jobs: uses: actions/checkout@v4 - name: Install Conda environment with Micromamba - uses: mamba-org/setup-micromamba@v1 + uses: mamba-org/setup-micromamba@v2 with: environment-file: ci/envs/${{ matrix.env }}.yml create-args: >- From ab3486e93719d73b8b17d32f5240dcf4c3bb56c7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Oct 2024 13:16:50 +0200 Subject: [PATCH 16/27] Bump pypa/cibuildwheel from 2.21.1 to 2.21.2 (#484) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.21.1 to 2.21.2. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.21.1...v2.21.2) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f1ab3dec..ebd131c2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -137,7 +137,7 @@ jobs: BUILDKIT_PROGRESS: plain - name: Build wheels - uses: pypa/cibuildwheel@v2.21.1 + uses: pypa/cibuildwheel@v2.21.2 - uses: actions/upload-artifact@v4 with: @@ -220,7 +220,7 @@ jobs: path: ${{ matrix.vcpkg_logs }} - name: Build wheels - uses: pypa/cibuildwheel@v2.21.1 + uses: pypa/cibuildwheel@v2.21.2 env: # CIBW needs to know triplet for the correct install path VCPKG_DEFAULT_TRIPLET: ${{ matrix.triplet }} From fc555f0f74a913d32a8cfa6eafac85d06ce7c849 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Mon, 21 Oct 2024 19:15:24 +0200 Subject: [PATCH 17/27] TST: test_write_kml_file_coordinate_order(): make it compatible with GDAL 3.10 (#489) --- pyogrio/tests/test_geopandas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 0675c197..112eef84 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -2257,7 +2257,7 @@ def test_write_kml_file_coordinate_order(tmp_path, use_arrow): if "LIBKML" in list_drivers(): # test appending to the existing file only if LIBKML is available # as it appears to fall back on LIBKML driver when appending. - points_append = [Point(70, 80), Point(90, 100), Point(110, 120)] + points_append = [Point(7, 8), Point(9, 10), Point(11, 12)] gdf_append = gp.GeoDataFrame(geometry=points_append, crs="EPSG:4326") write_dataframe( From ce66d6c817494b192f30245878aaf98dfb1ea541 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Nov 2024 20:44:49 +0100 Subject: [PATCH 18/27] CI: update for uv 0.5 (#496) --- .github/workflows/docker-gdal.yml | 2 +- .github/workflows/release.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-gdal.yml b/.github/workflows/docker-gdal.yml index 19fe609e..8cb19547 100644 --- a/.github/workflows/docker-gdal.yml +++ b/.github/workflows/docker-gdal.yml @@ -53,7 +53,7 @@ jobs: # used for tests below run: | curl -LsSf https://astral.sh/uv/install.sh | sh - . $HOME/.cargo/env + . $HOME/.local/bin/env uv venv .venv echo "VIRTUAL_ENV=.venv" >> $GITHUB_ENV echo "$PWD/.venv/bin" >> $GITHUB_PATH diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ebd131c2..322c3bad 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -67,7 +67,7 @@ jobs: # used for tests below run: | curl -LsSf https://astral.sh/uv/install.sh | sh - . $HOME/.cargo/env + . $HOME/.local/bin/env uv venv .venv echo "VIRTUAL_ENV=.venv" >> $GITHUB_ENV echo "$PWD/.venv/bin" >> $GITHUB_PATH @@ -280,7 +280,7 @@ jobs: shell: bash run: | curl -LsSf https://astral.sh/uv/install.sh | sh - . $HOME/.cargo/env + . $HOME/.local/bin/env uv venv .venv echo "VIRTUAL_ENV=.venv" >> $GITHUB_ENV echo "$PWD/.venv/bin" >> $GITHUB_PATH From bbde6bb65f4b7ac9e8156ba7fe062899afab741a Mon Sep 17 00:00:00 2001 From: Brendan Ward Date: Sat, 30 Nov 2024 00:50:56 -0800 Subject: [PATCH 19/27] MNT: Upgrade to GDAL 3.10.0 (#499) Co-authored-by: Joris Van den Bossche --- .circleci/config.yml | 2 +- .github/workflows/docker-gdal.yml | 5 ++-- .github/workflows/release.yml | 23 +++++++++---------- .github/workflows/tests-conda.yml | 2 +- CHANGES.md | 6 +++++ ci/manylinux2014_x86_64-vcpkg-gdal.Dockerfile | 2 +- ...nylinux_2_28_aarch64-vcpkg-gdal.Dockerfile | 2 +- ...anylinux_2_28_x86_64-vcpkg-gdal.Dockerfile | 2 +- ci/vcpkg.json | 4 ++-- pyproject.toml | 8 +++---- setup.py | 2 +- 11 files changed, 32 insertions(+), 26 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 498fdf06..d8bb2719 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -20,7 +20,7 @@ jobs: - run: name: Build the Linux aarch64 wheels. command: | - python3 -m pip install --user cibuildwheel==2.21.0 + python3 -m pip install --user cibuildwheel==2.22.0 python3 -m cibuildwheel --output-dir wheelhouse - run: name: Test the wheels diff --git a/.github/workflows/docker-gdal.yml b/.github/workflows/docker-gdal.yml index 8cb19547..6934b721 100644 --- a/.github/workflows/docker-gdal.yml +++ b/.github/workflows/docker-gdal.yml @@ -21,6 +21,7 @@ jobs: matrix: container: - "ghcr.io/osgeo/gdal:ubuntu-small-latest" # >= python 3.12.3 + - "ghcr.io/osgeo/gdal:ubuntu-small-3.10.0" # python 3.12.3 - "ghcr.io/osgeo/gdal:ubuntu-small-3.9.2" # python 3.12.3 - "ghcr.io/osgeo/gdal:ubuntu-small-3.8.5" # python 3.10.12 - "ghcr.io/osgeo/gdal:ubuntu-small-3.7.3" # python 3.10.12 @@ -36,9 +37,9 @@ jobs: run: | apt-get update && apt-get install -y build-essential git python3-dev - - name: Install Python + - name: Install Python # the GDAL 3.4 and 3.5 images do have Python 3.8 installed, so have to - # install a more recent Python version manually + # install a more recent Python version manually if: matrix.container == 'osgeo/gdal:ubuntu-small-3.5.3' || matrix.container == 'osgeo/gdal:ubuntu-small-3.4.3' run: | apt-get update && apt-get install -y software-properties-common diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 322c3bad..16c554bc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -54,7 +54,7 @@ jobs: needs: [build-sdist] runs-on: ubuntu-latest container: - image: "ghcr.io/osgeo/gdal:ubuntu-small-3.9.2" + image: "ghcr.io/osgeo/gdal:ubuntu-small-3.10.0" steps: - name: Install packages @@ -137,7 +137,7 @@ jobs: BUILDKIT_PROGRESS: plain - name: Build wheels - uses: pypa/cibuildwheel@v2.21.2 + uses: pypa/cibuildwheel@v2.22.0 - uses: actions/upload-artifact@v4 with: @@ -184,7 +184,7 @@ jobs: path: | ${{ matrix.vcpkg_cache }} # bump the last digit to avoid using previous build cache - key: ${{ matrix.os }}-${{ matrix.arch }}-vcpkg-gdal3.9.2-cache0 + key: ${{ matrix.os }}-${{ matrix.arch }}-vcpkg-gdal3.10.0-cache0 # MacOS build requires aclocal, which is part of automake, but appears # to be missing in default image @@ -202,7 +202,7 @@ jobs: git reset --hard # pull specific commit with desired GDAL version git pull - git checkout 73794ce5f63fd138fab999a22959ca7c6305d93c + git checkout 0857a4b08c14030bbe41e80accb2b1fddb047a74 - name: Install GDAL env: @@ -220,7 +220,7 @@ jobs: path: ${{ matrix.vcpkg_logs }} - name: Build wheels - uses: pypa/cibuildwheel@v2.21.2 + uses: pypa/cibuildwheel@v2.20.0 env: # CIBW needs to know triplet for the correct install path VCPKG_DEFAULT_TRIPLET: ${{ matrix.triplet }} @@ -272,15 +272,15 @@ jobs: python-version: ${{ matrix.python-version }} allow-prereleases: true + - name: Install uv + uses: astral-sh/setup-uv@v4 + - name: Create virtual environment (Linux / MacOS) - # install uv and use it to create a virtual environment, then add it to - # environment variables so that it is automatically activated and can be - # used for tests below + # use uv to create a virtual environment, then add it to environment + # variables so that it is automatically activated and can be used for + # tests below if: ${{ runner.os != 'Windows' }} - shell: bash run: | - curl -LsSf https://astral.sh/uv/install.sh | sh - . $HOME/.local/bin/env uv venv .venv echo "VIRTUAL_ENV=.venv" >> $GITHUB_ENV echo "$PWD/.venv/bin" >> $GITHUB_PATH @@ -288,7 +288,6 @@ jobs: - name: Create virtual environment (Windows) if: ${{ runner.os == 'Windows' }} run: | - irm https://astral.sh/uv/install.ps1 | iex uv venv .venv "VIRTUAL_ENV=.venv" | Out-File -FilePath $env:GITHUB_ENV -Append "$PWD/.venv/Scripts" | Out-File -FilePath $env:GITHUB_PATH -Append diff --git a/.github/workflows/tests-conda.yml b/.github/workflows/tests-conda.yml index 4f13e44b..48e81a97 100644 --- a/.github/workflows/tests-conda.yml +++ b/.github/workflows/tests-conda.yml @@ -62,7 +62,7 @@ jobs: run: | echo "GDAL_INCLUDE_PATH=$MAMBA_ROOT_PREFIX/envs/test/Library/include." >> $GITHUB_ENV echo "GDAL_LIBRARY_PATH=$MAMBA_ROOT_PREFIX/envs/test/Library/lib" >> $GITHUB_ENV - echo "GDAL_VERSION=$(gdalinfo --version | cut -c 6-10)" >> $GITHUB_ENV + echo "GDAL_VERSION=$(gdalinfo --version | awk '{print $2}' | tr -d ",")" >> $GITHUB_ENV - name: Install pyogrio run: pip install -e . diff --git a/CHANGES.md b/CHANGES.md index ac51e8c7..22d5fcd9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,11 @@ # CHANGELOG +## 0.11.0 (TBD) + +### Packaging + +- the GDAL library included in the wheels is upgraded from 3.9.2 to 3.10.0 (#499). + ## 0.10.0 (2024-09-28) ### Improvements diff --git a/ci/manylinux2014_x86_64-vcpkg-gdal.Dockerfile b/ci/manylinux2014_x86_64-vcpkg-gdal.Dockerfile index bb1a7843..1dd50367 100644 --- a/ci/manylinux2014_x86_64-vcpkg-gdal.Dockerfile +++ b/ci/manylinux2014_x86_64-vcpkg-gdal.Dockerfile @@ -7,7 +7,7 @@ RUN yum install -y curl unzip zip tar perl-IPC-Cmd RUN ln -s /opt/python/cp38-cp38/bin/python3 /usr/bin/python3 RUN git clone https://github.com/Microsoft/vcpkg.git /opt/vcpkg && \ - git -C /opt/vcpkg checkout 73794ce5f63fd138fab999a22959ca7c6305d93c + git -C /opt/vcpkg checkout 0857a4b08c14030bbe41e80accb2b1fddb047a74 ENV VCPKG_INSTALLATION_ROOT="/opt/vcpkg" ENV PATH="${PATH}:/opt/vcpkg" diff --git a/ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile b/ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile index 5a713eaf..4851ca40 100644 --- a/ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile +++ b/ci/manylinux_2_28_aarch64-vcpkg-gdal.Dockerfile @@ -4,7 +4,7 @@ FROM quay.io/pypa/manylinux_2_28_aarch64:2024-08-12-7fde9b1 RUN dnf -y install curl zip unzip tar ninja-build perl-IPC-Cmd RUN git clone https://github.com/Microsoft/vcpkg.git /opt/vcpkg && \ - git -C /opt/vcpkg checkout 73794ce5f63fd138fab999a22959ca7c6305d93c + git -C /opt/vcpkg checkout 0857a4b08c14030bbe41e80accb2b1fddb047a74 ENV VCPKG_INSTALLATION_ROOT="/opt/vcpkg" ENV PATH="${PATH}:/opt/vcpkg" diff --git a/ci/manylinux_2_28_x86_64-vcpkg-gdal.Dockerfile b/ci/manylinux_2_28_x86_64-vcpkg-gdal.Dockerfile index 6a6ef05c..29ef445c 100644 --- a/ci/manylinux_2_28_x86_64-vcpkg-gdal.Dockerfile +++ b/ci/manylinux_2_28_x86_64-vcpkg-gdal.Dockerfile @@ -4,7 +4,7 @@ FROM quay.io/pypa/manylinux_2_28_x86_64:2024-08-12-7fde9b1 RUN dnf -y install curl zip unzip tar ninja-build perl-IPC-Cmd RUN git clone https://github.com/Microsoft/vcpkg.git /opt/vcpkg && \ - git -C /opt/vcpkg checkout 73794ce5f63fd138fab999a22959ca7c6305d93c + git -C /opt/vcpkg checkout 0857a4b08c14030bbe41e80accb2b1fddb047a74 ENV VCPKG_INSTALLATION_ROOT="/opt/vcpkg" ENV PATH="${PATH}:/opt/vcpkg" diff --git a/ci/vcpkg.json b/ci/vcpkg.json index ea331f18..0919a80f 100644 --- a/ci/vcpkg.json +++ b/ci/vcpkg.json @@ -1,6 +1,6 @@ { "name": "pyogrio", - "version": "0.10.0", + "version": "0.11.0", "dependencies": [ { "name": "gdal", @@ -8,5 +8,5 @@ "features": ["recommended-features", "curl", "geos", "iconv"] } ], - "builtin-baseline": "73794ce5f63fd138fab999a22959ca7c6305d93c" + "builtin-baseline": "0857a4b08c14030bbe41e80accb2b1fddb047a74" } diff --git a/pyproject.toml b/pyproject.toml index 26617a79..ed0471bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ name = "pyogrio" dynamic = ["version"] authors = [ { name = "Brendan C. Ward", email = "bcward@astutespruce.com" }, - { name = "pyogrio contributors" } + { name = "pyogrio contributors" }, ] maintainers = [{ name = "pyogrio contributors" }] license = { file = "LICENSE" } @@ -51,7 +51,7 @@ build-verbosity = 3 VCPKG_INSTALL = "$VCPKG_INSTALLATION_ROOT/installed/$VCPKG_DEFAULT_TRIPLET" GDAL_INCLUDE_PATH = "$VCPKG_INSTALL/include" GDAL_LIBRARY_PATH = "$VCPKG_INSTALL/lib" -GDAL_VERSION = "3.9.2" +GDAL_VERSION = "3.10.0" PYOGRIO_PACKAGE_DATA = 1 GDAL_DATA = "$VCPKG_INSTALL/share/gdal" PROJ_LIB = "$VCPKG_INSTALL/share/proj" @@ -66,7 +66,7 @@ repair-wheel-command = [ VCPKG_INSTALL = "$VCPKG_INSTALLATION_ROOT/installed/$VCPKG_DEFAULT_TRIPLET" GDAL_INCLUDE_PATH = "$VCPKG_INSTALL/include" GDAL_LIBRARY_PATH = "$VCPKG_INSTALL/lib" -GDAL_VERSION = "3.9.2" +GDAL_VERSION = "3.10.0" PYOGRIO_PACKAGE_DATA = 1 GDAL_DATA = "$VCPKG_INSTALL/share/gdal" PROJ_LIB = "$VCPKG_INSTALL/share/proj" @@ -80,7 +80,7 @@ repair-wheel-command = "delvewheel repair --add-path C:/vcpkg/installed/x64-wind VCPKG_INSTALL = "$VCPKG_INSTALLATION_ROOT/installed/x64-windows-dynamic-release" GDAL_INCLUDE_PATH = "$VCPKG_INSTALL/include" GDAL_LIBRARY_PATH = "$VCPKG_INSTALL/lib" -GDAL_VERSION = "3.9.2" +GDAL_VERSION = "3.10.0" PYOGRIO_PACKAGE_DATA = 1 GDAL_DATA = "$VCPKG_INSTALL/share/gdal" PROJ_LIB = "$VCPKG_INSTALL/share/proj" diff --git a/setup.py b/setup.py index 4acb7677..10c4a03d 100644 --- a/setup.py +++ b/setup.py @@ -205,7 +205,7 @@ def get_gdal_config(): version=version, packages=find_packages(), include_package_data=True, - exclude_package_data={'': ['*.h', '_*.pxd', '_*.pyx']}, + exclude_package_data={"": ["*.h", "_*.pxd", "_*.pyx"]}, cmdclass=cmdclass, ext_modules=ext_modules, package_data=package_data, From 8c32f2d1b719c887cb4b8c2368487c9a919d5bd9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 30 Nov 2024 12:48:57 +0100 Subject: [PATCH 20/27] Bump pypa/cibuildwheel from 2.21.2 to 2.22.0 (#498) --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 16c554bc..1e367d66 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -220,7 +220,7 @@ jobs: path: ${{ matrix.vcpkg_logs }} - name: Build wheels - uses: pypa/cibuildwheel@v2.20.0 + uses: pypa/cibuildwheel@v2.22.0 env: # CIBW needs to know triplet for the correct install path VCPKG_DEFAULT_TRIPLET: ${{ matrix.triplet }} From 57d9346fd57a7aa78f8df901e3eb2ec1810e5973 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Thu, 5 Dec 2024 18:32:49 -0500 Subject: [PATCH 21/27] Fix WKB writing on big-endian systems (#497) --- CHANGES.md | 4 ++++ pyogrio/_io.pyx | 14 +++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 22d5fcd9..131f3359 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,10 @@ ## 0.11.0 (TBD) +### Bug fixes + +- Fix WKB writing on big-endian systems (#497). + ### Packaging - the GDAL library included in the wheels is upgraded from 3.9.2 to 3.10.0 (#499). diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index a9c934e5..40304851 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -2253,7 +2253,8 @@ def ogr_write( cdef OGRGeometryH ogr_geometry_multi = NULL cdef OGRFeatureDefnH ogr_featuredef = NULL cdef OGRFieldDefnH ogr_fielddef = NULL - cdef unsigned char *wkb_buffer = NULL + cdef const unsigned char *wkb_buffer = NULL + cdef unsigned int wkbtype = 0 cdef int supports_transactions = 0 cdef int err = 0 cdef int i = 0 @@ -2373,15 +2374,18 @@ def ogr_write( # TODO: geometry must not be null or errors wkb = None if geometry is None else geometry[i] if wkb is not None: - wkbtype = bytearray(wkb)[1] - # may need to consider all 4 bytes: int.from_bytes(wkb[0][1:4], byteorder="little") - # use "little" if the first byte == 1 + wkb_buffer = wkb + if wkb_buffer[0] == 1: + # Little endian WKB type. + wkbtype = wkb_buffer[1] + (wkb_buffer[2] << 8) + (wkb_buffer[3] << 16) + (wkb_buffer[4] << 24) + else: + # Big endian WKB type. + wkbtype = ((wkb_buffer[1]) << 24) + (wkb_buffer[2] << 16) + (wkb_buffer[3] << 8) + wkb_buffer[4] ogr_geometry = OGR_G_CreateGeometry(wkbtype) if ogr_geometry == NULL: raise GeometryError(f"Could not create geometry at index {i} for WKB type {wkbtype}") from None # import the WKB - wkb_buffer = wkb err = OGR_G_ImportFromWkb(ogr_geometry, wkb_buffer, len(wkb)) if err: raise GeometryError(f"Could not create geometry from WKB at index {i}") from None From ea3d23ffd5bcad95f7f170f7f46aa8bc584a6712 Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Fri, 6 Dec 2024 01:11:41 +0100 Subject: [PATCH 22/27] ENH: capture all errors logged by gdal when opening a file fails (#495) --- CHANGES.md | 8 +- pyogrio/_compat.py | 1 + pyogrio/_err.pxd | 13 +- pyogrio/_err.pyx | 319 +++++++++++++++++++++++------ pyogrio/_geometry.pyx | 2 +- pyogrio/_io.pyx | 92 +++++---- pyogrio/_ogr.pyx | 20 +- pyogrio/tests/conftest.py | 5 + pyogrio/tests/test_geopandas_io.py | 19 +- 9 files changed, 354 insertions(+), 125 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 131f3359..39c0ff8f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,13 +2,17 @@ ## 0.11.0 (TBD) +### Improvements + +- Capture all errors logged by gdal when opening a file fails (#495). + ### Bug fixes - Fix WKB writing on big-endian systems (#497). ### Packaging -- the GDAL library included in the wheels is upgraded from 3.9.2 to 3.10.0 (#499). +- The GDAL library included in the wheels is upgraded from 3.9.2 to 3.10.0 (#499). ## 0.10.0 (2024-09-28) @@ -20,7 +24,7 @@ - Silence warning from `write_dataframe` with `GeoSeries.notna()` (#435). - Enable mask & bbox filter when geometry column not read (#431). -- Raise NotImplmentedError when user attempts to write to an open file handle (#442). +- Raise `NotImplementedError` when user attempts to write to an open file handle (#442). - Prevent seek on read from compressed inputs (#443). ### Packaging diff --git a/pyogrio/_compat.py b/pyogrio/_compat.py index 6c39ad90..acfea471 100644 --- a/pyogrio/_compat.py +++ b/pyogrio/_compat.py @@ -40,6 +40,7 @@ PANDAS_GE_20 = pandas is not None and Version(pandas.__version__) >= Version("2.0.0") PANDAS_GE_22 = pandas is not None and Version(pandas.__version__) >= Version("2.2.0") +GDAL_GE_352 = __gdal_version__ >= (3, 5, 2) GDAL_GE_38 = __gdal_version__ >= (3, 8, 0) HAS_GDAL_GEOS = __gdal_geos_version__ is not None diff --git a/pyogrio/_err.pxd b/pyogrio/_err.pxd index 53d52a13..49d321ea 100644 --- a/pyogrio/_err.pxd +++ b/pyogrio/_err.pxd @@ -1,4 +1,9 @@ -cdef object exc_check() -cdef int exc_wrap_int(int retval) except -1 -cdef int exc_wrap_ogrerr(int retval) except -1 -cdef void *exc_wrap_pointer(void *ptr) except NULL +cdef object check_last_error() +cdef int check_int(int retval) except -1 +cdef void *check_pointer(void *ptr) except NULL + +cdef class ErrorHandler: + cdef object error_stack + cdef int check_int(self, int retval, bint squash_errors) except -1 + cdef void *check_pointer(self, void *ptr, bint squash_errors) except NULL + cdef void _handle_error_stack(self, bint squash_errors) diff --git a/pyogrio/_err.pyx b/pyogrio/_err.pyx index 51f2fcfb..d280101b 100644 --- a/pyogrio/_err.pyx +++ b/pyogrio/_err.pyx @@ -1,25 +1,26 @@ -# ported from fiona::_err.pyx -from enum import IntEnum +"""Error handling code for GDAL/OGR. + +Ported from fiona::_err.pyx +""" + +import contextlib import warnings +from contextvars import ContextVar +from itertools import zip_longest from pyogrio._ogr cimport ( CE_None, CE_Debug, CE_Warning, CE_Failure, CE_Fatal, CPLErrorReset, CPLGetLastErrorType, CPLGetLastErrorNo, CPLGetLastErrorMsg, OGRErr, - CPLErr, CPLErrorHandler, CPLDefaultErrorHandler, CPLPushErrorHandler) - - -# CPL Error types as an enum. -class GDALError(IntEnum): - none = CE_None - debug = CE_Debug - warning = CE_Warning - failure = CE_Failure - fatal = CE_Fatal + OGRERR_NONE, CPLErr, CPLErrorHandler, CPLDefaultErrorHandler, + CPLPopErrorHandler, CPLPushErrorHandler) +_ERROR_STACK = ContextVar("error_stack") +_ERROR_STACK.set([]) class CPLE_BaseError(Exception): """Base CPL error class. + For internal use within Cython only. """ @@ -103,14 +104,25 @@ class CPLE_AWSSignatureDoesNotMatchError(CPLE_BaseError): pass +class CPLE_AWSError(CPLE_BaseError): + pass + + class NullPointerError(CPLE_BaseError): """ - Returned from exc_wrap_pointer when a NULL pointer is passed, but no GDAL + Returned from check_pointer when a NULL pointer is passed, but no GDAL error was raised. """ pass +class CPLError(CPLE_BaseError): + """ + Returned from check_int when a error code is returned, but no GDAL + error was set. + """ + pass + # Map of GDAL error numbers to the Python exceptions. exception_map = { @@ -132,95 +144,112 @@ exception_map = { 13: CPLE_AWSObjectNotFoundError, 14: CPLE_AWSAccessDeniedError, 15: CPLE_AWSInvalidCredentialsError, - 16: CPLE_AWSSignatureDoesNotMatchError + 16: CPLE_AWSSignatureDoesNotMatchError, + 17: CPLE_AWSError } -cdef inline object exc_check(): - """Checks GDAL error stack for fatal or non-fatal errors +cdef inline object check_last_error(): + """Checks if the last GDAL error was a fatal or non-fatal error. + + When a non-fatal error is found, an appropriate exception is raised. + + When a fatal error is found, SystemExit is called. + Returns ------- An Exception, SystemExit, or None """ - cdef const char *msg_c = NULL - err_type = CPLGetLastErrorType() err_no = CPLGetLastErrorNo() - err_msg = CPLGetLastErrorMsg() + err_msg = clean_error_message(CPLGetLastErrorMsg()) + if err_msg == "": + err_msg = "No error message." + + if err_type == CE_Failure: + CPLErrorReset() + return exception_map.get( + err_no, CPLE_BaseError)(err_type, err_no, err_msg) + + if err_type == CE_Fatal: + return SystemExit("Fatal error: {0}".format((err_type, err_no, err_msg))) - if err_msg == NULL: - msg = "No error message." - else: - # Reformat messages. - msg_b = err_msg +cdef clean_error_message(const char* err_msg): + """Cleans up error messages from GDAL. + + Parameters + ---------- + err_msg : const char* + The error message to clean up. + + Returns + ------- + str + The cleaned up error message or empty string + """ + if err_msg != NULL: + # Reformat message. + msg_b = err_msg try: - msg = msg_b.decode('utf-8') + msg = msg_b.decode("utf-8") msg = msg.replace("`", "'") msg = msg.replace("\n", " ") except UnicodeDecodeError as exc: - msg = f"Could not decode error message to UTF-8. Raw error: {msg_b}" + msg = f"Could not decode error message to UTF-8. Raw error: {msg_b}" - if err_type == 3: - CPLErrorReset() - return exception_map.get( - err_no, CPLE_BaseError)(err_type, err_no, msg) + else: + msg = "" - if err_type == 4: - return SystemExit("Fatal error: {0}".format((err_type, err_no, msg))) + return msg - else: - return +cdef void *check_pointer(void *ptr) except NULL: + """Check the pointer returned by a GDAL/OGR function. -cdef void *exc_wrap_pointer(void *ptr) except NULL: - """Wrap a GDAL/OGR function that returns GDALDatasetH etc (void *) - Raises an exception if a non-fatal error has be set or if pointer is NULL. + If `ptr` is `NULL`, an exception inheriting from CPLE_BaseError is raised. + When the last error registered by GDAL/OGR was a non-fatal error, the + exception raised will be customized appropriately. Otherwise a + NullPointerError is raised. """ if ptr == NULL: - exc = exc_check() + exc = check_last_error() if exc: raise exc else: # null pointer was passed, but no error message from GDAL raise NullPointerError(-1, -1, "NULL pointer error") + return ptr -cdef int exc_wrap_int(int err) except -1: - """Wrap a GDAL/OGR function that returns CPLErr or OGRErr (int) - Raises an exception if a non-fatal error has be set. +cdef int check_int(int err) except -1: + """Check the CPLErr (int) value returned by a GDAL/OGR function. - Copied from Fiona (_err.pyx). + If `err` is not OGRERR_NONE, an exception inheriting from CPLE_BaseError is raised. + When the last error registered by GDAL/OGR was a non-fatal error, the + exception raised will be customized appropriately. Otherwise a CPLError is + raised. """ - if err: - exc = exc_check() + if err != OGRERR_NONE: + exc = check_last_error() if exc: raise exc else: # no error message from GDAL - raise CPLE_BaseError(-1, -1, "Unspecified OGR / GDAL error") - return err - - -cdef int exc_wrap_ogrerr(int err) except -1: - """Wrap a function that returns OGRErr (int) but does not use the - CPL error stack. - - Adapted from Fiona (_err.pyx). - """ - if err != 0: - raise CPLE_BaseError(3, err, f"OGR Error code {err}") + raise CPLError(-1, -1, "Unspecified OGR / GDAL error") return err -cdef void error_handler(CPLErr err_class, int err_no, const char* err_msg) nogil: +cdef void error_handler(CPLErr err_class, int err_no, const char* err_msg) noexcept nogil: """Custom CPL error handler to match the Python behaviour. - Generally we want to suppress error printing to stderr (behaviour of the - default GDAL error handler) because we already raise a Python exception - that includes the error message. + For non-fatal errors (CE_Failure), error printing to stderr (behaviour of + the default GDAL error handler) is suppressed, because we already raise a + Python exception that includes the error message. + + Warnings are converted to Python warnings. """ if err_class == CE_Fatal: # If the error class is CE_Fatal, we want to have a message issued @@ -229,16 +258,14 @@ cdef void error_handler(CPLErr err_class, int err_no, const char* err_msg) nogil CPLDefaultErrorHandler(err_class, err_no, err_msg) return - elif err_class == CE_Failure: + if err_class == CE_Failure: # For Failures, do nothing as those are explicitly caught # with error return codes and translated into Python exceptions return - elif err_class == CE_Warning: + if err_class == CE_Warning: with gil: - msg_b = err_msg - msg = msg_b.decode('utf-8') - warnings.warn(msg, RuntimeWarning) + warnings.warn(clean_error_message(err_msg), RuntimeWarning) return # Fall back to the default handler for non-failure messages since @@ -248,3 +275,165 @@ cdef void error_handler(CPLErr err_class, int err_no, const char* err_msg) nogil def _register_error_handler(): CPLPushErrorHandler(error_handler) + + +cdef class ErrorHandler: + + def __init__(self, error_stack=None): + self.error_stack = error_stack or {} + + cdef int check_int(self, int err, bint squash_errors) except -1: + """Check the CPLErr (int) value returned by a GDAL/OGR function. + + If `err` is not OGRERR_NONE, an exception inheriting from CPLE_BaseError is + raised. + When a non-fatal GDAL/OGR error was captured in the error stack, the + exception raised will be customized appropriately. Otherwise, a + CPLError is raised. + + Parameters + ---------- + err : int + The CPLErr returned by a GDAL/OGR function. + squash_errors : bool + True to squash all errors captured to one error with the exception type of + the last error and all error messages concatenated. + + Returns + ------- + int + The `err` input parameter if it is OGRERR_NONE. Otherwise an exception is + raised. + + """ + if err != OGRERR_NONE: + if self.error_stack.get(): + self._handle_error_stack(squash_errors) + else: + raise CPLError(CE_Failure, err, "Unspecified OGR / GDAL error") + + return err + + cdef void *check_pointer(self, void *ptr, bint squash_errors) except NULL: + """Check the pointer returned by a GDAL/OGR function. + + If `ptr` is `NULL`, an exception inheriting from CPLE_BaseError is + raised. + When a non-fatal GDAL/OGR error was captured in the error stack, the + exception raised will be customized appropriately. Otherwise, a + NullPointerError is raised. + + Parameters + ---------- + ptr : pointer + The pointer returned by a GDAL/OGR function. + squash_errors : bool + True to squash all errors captured to one error with the exception type of + the last error and all error messages concatenated. + + Returns + ------- + pointer + The `ptr` input parameter if it is not `NULL`. Otherwise an exception is + raised. + + """ + if ptr == NULL: + if self.error_stack.get(): + self._handle_error_stack(squash_errors) + else: + raise NullPointerError(-1, -1, "NULL pointer error") + + return ptr + + cdef void _handle_error_stack(self, bint squash_errors): + """Handle the errors in `error_stack`.""" + stack = self.error_stack.get() + for error, cause in zip_longest(stack[::-1], stack[::-1][1:]): + if error is not None and cause is not None: + error.__cause__ = cause + + last = stack.pop() + if last is not None: + if squash_errors: + # Concatenate all error messages, and raise a single exception + errmsg = str(last) + inner = last.__cause__ + while inner is not None: + errmsg = f"{errmsg}; {inner}" + inner = inner.__cause__ + + if errmsg == "": + errmsg = "No error message." + + raise type(last)(-1, -1, errmsg) + + raise last + + +cdef void stacking_error_handler( + CPLErr err_class, + int err_no, + const char* err_msg +) noexcept nogil: + """Custom CPL error handler that adds non-fatal errors to a stack. + + All non-fatal errors (CE_Failure) are not printed to stderr (behaviour + of the default GDAL error handler), but they are converted to python + exceptions and added to a stack, so they can be dealt with afterwards. + + Warnings are converted to Python warnings. + """ + if err_class == CE_Fatal: + # If the error class is CE_Fatal, we want to have a message issued + # because the CPL support code does an abort() before any exception + # can be generated + CPLDefaultErrorHandler(err_class, err_no, err_msg) + return + + if err_class == CE_Failure: + # For Failures, add them to the error exception stack + with gil: + stack = _ERROR_STACK.get() + stack.append( + exception_map.get(err_no, CPLE_BaseError)( + err_class, err_no, clean_error_message(err_msg) + ), + ) + _ERROR_STACK.set(stack) + + return + + if err_class == CE_Warning: + with gil: + warnings.warn(clean_error_message(err_msg), RuntimeWarning) + return + + # Fall back to the default handler for non-failure messages since + # they won't be translated into exceptions. + CPLDefaultErrorHandler(err_class, err_no, err_msg) + + +@contextlib.contextmanager +def capture_errors(): + """A context manager that captures all GDAL non-fatal errors occuring. + + It adds all errors to a single stack, so it assumes that no more than one + GDAL function is called. + + Yields an ErrorHandler object that can be used to handle the errors + if any were captured. + """ + CPLErrorReset() + _ERROR_STACK.set([]) + + # stacking_error_handler records GDAL errors in the order they occur and + # converts them to exceptions. + CPLPushErrorHandler(stacking_error_handler) + + # Run code in the `with` block. + yield ErrorHandler(_ERROR_STACK) + + CPLPopErrorHandler() + _ERROR_STACK.set([]) + CPLErrorReset() diff --git a/pyogrio/_geometry.pyx b/pyogrio/_geometry.pyx index b41a6640..95706df4 100644 --- a/pyogrio/_geometry.pyx +++ b/pyogrio/_geometry.pyx @@ -84,7 +84,7 @@ cdef str get_geometry_type(void *ogr_layer): cdef OGRwkbGeometryType ogr_type try: - ogr_featuredef = exc_wrap_pointer(OGR_L_GetLayerDefn(ogr_layer)) + ogr_featuredef = check_pointer(OGR_L_GetLayerDefn(ogr_layer)) except NullPointerError: raise DataLayerError("Could not get layer definition") diff --git a/pyogrio/_io.pyx b/pyogrio/_io.pyx index 40304851..55245799 100644 --- a/pyogrio/_io.pyx +++ b/pyogrio/_io.pyx @@ -3,7 +3,6 @@ """IO support for OGR vector data sources """ - import contextlib import datetime import locale @@ -26,9 +25,18 @@ from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer import numpy as np from pyogrio._ogr cimport * -from pyogrio._err cimport * +from pyogrio._err cimport ( + check_last_error, check_int, check_pointer, ErrorHandler +) from pyogrio._vsi cimport * -from pyogrio._err import CPLE_BaseError, CPLE_NotSupportedError, NullPointerError +from pyogrio._err import ( + CPLE_AppDefinedError, + CPLE_BaseError, + CPLE_NotSupportedError, + CPLE_OpenFailedError, + NullPointerError, + capture_errors, +) from pyogrio._geometry cimport get_geometry_type, get_geometry_type_code from pyogrio.errors import CRSError, DataSourceError, DataLayerError, GeometryError, FieldError, FeatureError @@ -185,7 +193,8 @@ cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL: options : char **, optional dataset open options """ - cdef void* ogr_dataset = NULL + cdef void *ogr_dataset = NULL + cdef ErrorHandler errors # Force linear approximations in all cases OGRSetNonLinearGeometriesEnabledFlag(0) @@ -196,27 +205,27 @@ cdef void* ogr_open(const char* path_c, int mode, char** options) except NULL: else: flags |= GDAL_OF_READONLY - try: # WARNING: GDAL logs warnings about invalid open options to stderr # instead of raising an error - ogr_dataset = exc_wrap_pointer( - GDALOpenEx(path_c, flags, NULL, options, NULL) - ) - - return ogr_dataset + with capture_errors() as errors: + ogr_dataset = GDALOpenEx(path_c, flags, NULL, options, NULL) + return errors.check_pointer(ogr_dataset, True) except NullPointerError: raise DataSourceError( - "Failed to open dataset (mode={}): {}".format(mode, path_c.decode("utf-8")) + f"Failed to open dataset ({mode=}): {path_c.decode('utf-8')}" ) from None except CPLE_BaseError as exc: - if str(exc).endswith("a supported file format."): + if " a supported file format." in str(exc): + # In gdal 3.9, this error message was slightly changed, so we can only check + # on this part of the error message. raise DataSourceError( - f"{str(exc)} It might help to specify the correct driver explicitly by " + f"{str(exc)}; It might help to specify the correct driver explicitly by " "prefixing the file path with ':', e.g. 'CSV:path'." ) from None + raise DataSourceError(str(exc)) from None @@ -227,7 +236,7 @@ cdef ogr_close(GDALDatasetH ogr_dataset): if ogr_dataset != NULL: IF CTE_GDAL_VERSION >= (3, 7, 0): if GDALClose(ogr_dataset) != CE_None: - return exc_check() + return check_last_error() return @@ -236,7 +245,7 @@ cdef ogr_close(GDALDatasetH ogr_dataset): # GDAL will set an error if there was an error writing the data source # on close - return exc_check() + return check_last_error() cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL: @@ -258,10 +267,10 @@ cdef OGRLayerH get_ogr_layer(GDALDatasetH ogr_dataset, layer) except NULL: if isinstance(layer, str): name_b = layer.encode('utf-8') name_c = name_b - ogr_layer = exc_wrap_pointer(GDALDatasetGetLayerByName(ogr_dataset, name_c)) + ogr_layer = check_pointer(GDALDatasetGetLayerByName(ogr_dataset, name_c)) elif isinstance(layer, int): - ogr_layer = exc_wrap_pointer(GDALDatasetGetLayer(ogr_dataset, layer)) + ogr_layer = check_pointer(GDALDatasetGetLayer(ogr_dataset, layer)) # GDAL does not always raise exception messages in this case except NullPointerError: @@ -304,11 +313,11 @@ cdef OGRLayerH execute_sql(GDALDatasetH ogr_dataset, str sql, str sql_dialect=No sql_b = sql.encode('utf-8') sql_c = sql_b if sql_dialect is None: - return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL)) + return check_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, NULL)) sql_dialect_b = sql_dialect.encode('utf-8') sql_dialect_c = sql_dialect_b - return exc_wrap_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, sql_dialect_c)) + return check_pointer(GDALDatasetExecuteSQL(ogr_dataset, sql_c, NULL, sql_dialect_c)) # GDAL does not always raise exception messages in this case except NullPointerError: @@ -336,7 +345,7 @@ cdef str get_crs(OGRLayerH ogr_layer): cdef char *ogr_wkt = NULL try: - ogr_crs = exc_wrap_pointer(OGR_L_GetSpatialRef(ogr_layer)) + ogr_crs = check_pointer(OGR_L_GetSpatialRef(ogr_layer)) except NullPointerError: # No coordinate system defined. @@ -383,7 +392,7 @@ cdef get_driver(OGRDataSourceH ogr_dataset): cdef void *ogr_driver try: - ogr_driver = exc_wrap_pointer(GDALGetDatasetDriver(ogr_dataset)) + ogr_driver = check_pointer(GDALGetDatasetDriver(ogr_dataset)) except NullPointerError: raise DataLayerError(f"Could not detect driver of dataset") from None @@ -426,7 +435,7 @@ cdef get_feature_count(OGRLayerH ogr_layer, int force): feature_count = 0 while True: try: - ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer)) + ogr_feature = check_pointer(OGR_L_GetNextFeature(ogr_layer)) feature_count +=1 except NullPointerError: @@ -469,13 +478,12 @@ cdef get_total_bounds(OGRLayerH ogr_layer, int force): """ cdef OGREnvelope ogr_envelope - try: - exc_wrap_ogrerr(OGR_L_GetExtent(ogr_layer, &ogr_envelope, force)) + + if OGR_L_GetExtent(ogr_layer, &ogr_envelope, force) == OGRERR_NONE: bounds = ( ogr_envelope.MinX, ogr_envelope.MinY, ogr_envelope.MaxX, ogr_envelope.MaxY ) - - except CPLE_BaseError: + else: bounds = None return bounds @@ -621,7 +629,7 @@ cdef get_fields(OGRLayerH ogr_layer, str encoding, use_arrow=False): cdef const char *key_c try: - ogr_featuredef = exc_wrap_pointer(OGR_L_GetLayerDefn(ogr_layer)) + ogr_featuredef = check_pointer(OGR_L_GetLayerDefn(ogr_layer)) except NullPointerError: raise DataLayerError("Could not get layer definition") from None @@ -638,7 +646,7 @@ cdef get_fields(OGRLayerH ogr_layer, str encoding, use_arrow=False): for i in range(field_count): try: - ogr_fielddef = exc_wrap_pointer(OGR_FD_GetFieldDefn(ogr_featuredef, i)) + ogr_fielddef = check_pointer(OGR_FD_GetFieldDefn(ogr_featuredef, i)) except NullPointerError: raise FieldError(f"Could not get field definition for field at index {i}") from None @@ -700,7 +708,7 @@ cdef apply_where_filter(OGRLayerH ogr_layer, str where): # logs to stderr if err != OGRERR_NONE: try: - exc_check() + check_last_error() except CPLE_BaseError as exc: raise ValueError(str(exc)) @@ -973,7 +981,7 @@ cdef get_features( break try: - ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer)) + ogr_feature = check_pointer(OGR_L_GetNextFeature(ogr_layer)) except NullPointerError: # No more rows available, so stop reading @@ -1067,7 +1075,7 @@ cdef get_features_by_fid( fid = fids[i] try: - ogr_feature = exc_wrap_pointer(OGR_L_GetFeature(ogr_layer, fid)) + ogr_feature = check_pointer(OGR_L_GetFeature(ogr_layer, fid)) except NullPointerError: raise FeatureError(f"Could not read feature with fid {fid}") from None @@ -1122,7 +1130,7 @@ cdef get_bounds( break try: - ogr_feature = exc_wrap_pointer(OGR_L_GetNextFeature(ogr_layer)) + ogr_feature = check_pointer(OGR_L_GetNextFeature(ogr_layer)) except NullPointerError: # No more rows available, so stop reading @@ -1924,7 +1932,7 @@ cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) # Get the driver try: - ogr_driver = exc_wrap_pointer(GDALGetDriverByName(driver_c)) + ogr_driver = check_pointer(GDALGetDriverByName(driver_c)) except NullPointerError: raise DataSourceError(f"Could not obtain driver: {driver_c.decode('utf-8')} (check that it was installed correctly into GDAL)") @@ -1944,7 +1952,7 @@ cdef void * ogr_create(const char* path_c, const char* driver_c, char** options) # Create the dataset try: - ogr_dataset = exc_wrap_pointer(GDALCreate(ogr_driver, path_c, 0, 0, 0, GDT_Unknown, options)) + ogr_dataset = check_pointer(GDALCreate(ogr_driver, path_c, 0, 0, 0, GDT_Unknown, options)) except NullPointerError: raise DataSourceError(f"Failed to create dataset with driver: {path_c.decode('utf-8')} {driver_c.decode('utf-8')}") from None @@ -1966,7 +1974,7 @@ cdef void * create_crs(str crs) except NULL: crs_c = crs_b try: - ogr_crs = exc_wrap_pointer(OSRNewSpatialReference(NULL)) + ogr_crs = check_pointer(OSRNewSpatialReference(NULL)) err = OSRSetFromUserInput(ogr_crs, crs_c) if err: raise CRSError("Could not set CRS: {}".format(crs_c.decode('UTF-8'))) from None @@ -2190,12 +2198,12 @@ cdef create_ogr_dataset_layer( layer_b = layer.encode('UTF-8') layer_c = layer_b - ogr_layer = exc_wrap_pointer( + ogr_layer = check_pointer( GDALDatasetCreateLayer(ogr_dataset, layer_c, ogr_crs, geometry_code, layer_options)) else: - ogr_layer = exc_wrap_pointer(get_ogr_layer(ogr_dataset, layer)) + ogr_layer = check_pointer(get_ogr_layer(ogr_dataset, layer)) # Set dataset and layer metadata set_metadata(ogr_dataset, dataset_metadata) @@ -2335,7 +2343,7 @@ def ogr_write( name_b = fields[i].encode(encoding) try: - ogr_fielddef = exc_wrap_pointer(OGR_Fld_Create(name_b, field_type)) + ogr_fielddef = check_pointer(OGR_Fld_Create(name_b, field_type)) # subtypes, see: https://gdal.org/development/rfc/rfc50_ogr_field_subtype.html if field_subtype != OFSTNone: @@ -2346,7 +2354,7 @@ def ogr_write( # TODO: set precision - exc_wrap_int(OGR_L_CreateField(ogr_layer, ogr_fielddef, 1)) + check_int(OGR_L_CreateField(ogr_layer, ogr_fielddef, 1)) except: raise FieldError(f"Error adding field '{fields[i]}' to layer") from None @@ -2493,7 +2501,7 @@ def ogr_write( # Add feature to the layer try: - exc_wrap_int(OGR_L_CreateFeature(ogr_layer, ogr_feature)) + check_int(OGR_L_CreateFeature(ogr_layer, ogr_feature)) except CPLE_BaseError as exc: raise FeatureError(f"Could not add feature to layer at index {i}: {exc}") from None @@ -2619,7 +2627,7 @@ def ogr_write_arrow( break if not OGR_L_WriteArrowBatch(ogr_layer, &schema, &array, options): - exc = exc_check() + exc = check_last_error() gdal_msg = f": {str(exc)}" if exc else "." raise DataLayerError( f"Error while writing batch to OGR layer{gdal_msg}" @@ -2746,7 +2754,7 @@ cdef create_fields_from_arrow_schema( continue if not OGR_L_CreateFieldFromArrowSchema(destLayer, child, options): - exc = exc_check() + exc = check_last_error() gdal_msg = f" ({str(exc)})" if exc else "" raise FieldError( f"Error while creating field from Arrow for field {i} with name " diff --git a/pyogrio/_ogr.pyx b/pyogrio/_ogr.pyx index 1c2d5f64..ea02678e 100644 --- a/pyogrio/_ogr.pyx +++ b/pyogrio/_ogr.pyx @@ -3,7 +3,7 @@ import sys from uuid import uuid4 import warnings -from pyogrio._err cimport exc_wrap_int, exc_wrap_ogrerr, exc_wrap_pointer +from pyogrio._err cimport check_int, check_pointer from pyogrio._err import CPLE_BaseError, NullPointerError from pyogrio.errors import DataSourceError @@ -182,15 +182,15 @@ def has_proj_data(): """ cdef OGRSpatialReferenceH srs = OSRNewSpatialReference(NULL) - try: - exc_wrap_ogrerr(exc_wrap_int(OSRImportFromEPSG(srs, 4326))) - except CPLE_BaseError: - return False - else: + retval = OSRImportFromEPSG(srs, 4326) + if srs != NULL: + OSRRelease(srs) + + if retval == OGRERR_NONE: + # Succesfull return, so PROJ data files are correctly found return True - finally: - if srs != NULL: - OSRRelease(srs) + else: + return False def init_gdal_data(): @@ -282,7 +282,7 @@ def _get_driver_metadata_item(driver, metadata_item): cdef void *cogr_driver = NULL try: - cogr_driver = exc_wrap_pointer(GDALGetDriverByName(driver.encode('UTF-8'))) + cogr_driver = check_pointer(GDALGetDriverByName(driver.encode('UTF-8'))) except NullPointerError: raise DataSourceError( f"Could not obtain driver: {driver} (check that it was installed " diff --git a/pyogrio/tests/conftest.py b/pyogrio/tests/conftest.py index d6bea86b..ebc4e18d 100644 --- a/pyogrio/tests/conftest.py +++ b/pyogrio/tests/conftest.py @@ -124,6 +124,11 @@ def naturalearth_lowres_all_ext(tmp_path, naturalearth_lowres, request): return prepare_testfile(naturalearth_lowres, tmp_path, request.param) +@pytest.fixture(scope="function", params=[".geojson"]) +def naturalearth_lowres_geojson(tmp_path, naturalearth_lowres, request): + return prepare_testfile(naturalearth_lowres, tmp_path, request.param) + + @pytest.fixture(scope="function") def naturalearth_lowres_vsi(tmp_path, naturalearth_lowres): """Wrap naturalearth_lowres as a zip file for VSI tests""" diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index 112eef84..96d9e3a0 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -12,10 +12,11 @@ list_drivers, list_layers, read_info, + set_gdal_config_options, vsi_listtree, vsi_unlink, ) -from pyogrio._compat import HAS_ARROW_WRITE_API, HAS_PYPROJ, PANDAS_GE_15 +from pyogrio._compat import GDAL_GE_352, HAS_ARROW_WRITE_API, HAS_PYPROJ, PANDAS_GE_15 from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe from pyogrio.raw import ( @@ -227,6 +228,22 @@ def test_read_force_2d(tmp_path, use_arrow): assert not df.iloc[0].geometry.has_z +@pytest.mark.skipif( + not GDAL_GE_352, + reason="gdal >= 3.5.2 needed to use OGR_GEOJSON_MAX_OBJ_SIZE with a float value", +) +def test_read_geojson_error(naturalearth_lowres_geojson, use_arrow): + try: + set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": 0.01}) + with pytest.raises( + DataSourceError, + match="Failed to read GeoJSON data; .* GeoJSON object too complex", + ): + read_dataframe(naturalearth_lowres_geojson, use_arrow=use_arrow) + finally: + set_gdal_config_options({"OGR_GEOJSON_MAX_OBJ_SIZE": None}) + + def test_read_layer(tmp_path, use_arrow): filename = tmp_path / "test.gpkg" From 34321b8bd5ac2dbbe0ec5c9c42d2ee7d00eaf5ad Mon Sep 17 00:00:00 2001 From: Brendan Ward Date: Thu, 5 Dec 2024 23:34:22 -0800 Subject: [PATCH 23/27] CI: Upgrade to MacOS 13 runner (#505) Upgrade to MacOS 13 runner --- .github/workflows/release.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1e367d66..cc81a14a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -152,13 +152,13 @@ jobs: fail-fast: false matrix: include: - - os: "macos-12" + - os: "macos-13" triplet: "x64-osx-dynamic-release" arch: x86_64 vcpkg_cache: "/Users/runner/.cache/vcpkg/archives" vcpkg_logs: "/usr/local/share/vcpkg/buildtrees/**/*.log" - - os: "macos-12" + - os: "macos-13" triplet: "arm64-osx-dynamic-release" arch: arm64 vcpkg_cache: "/Users/runner/.cache/vcpkg/archives" @@ -244,7 +244,7 @@ jobs: "ubuntu-latest", "ubuntu-20.04", "windows-latest", - "macos-12", + "macos-13", "macos-latest", ] python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] @@ -257,7 +257,7 @@ jobs: artifact: pyogrio-wheel-linux-manylinux_2_28_x86_64 - os: "windows-latest" artifact: pyogrio-wheel-x64-windows-dynamic-release - - os: "macos-12" + - os: "macos-13" artifact: pyogrio-wheel-x64-osx-dynamic-release - os: "macos-latest" artifact: pyogrio-wheel-arm64-osx-dynamic-release From e96c532234f1d6c8e66c0dc6a85c9baf0d4d5fa9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 6 Dec 2024 19:11:24 +0100 Subject: [PATCH 24/27] BLD/RLS: ensure wheels are including GDAL built with openssl (#506) --- ci/vcpkg.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/vcpkg.json b/ci/vcpkg.json index 0919a80f..1e0196c6 100644 --- a/ci/vcpkg.json +++ b/ci/vcpkg.json @@ -5,7 +5,7 @@ { "name": "gdal", "default-features": false, - "features": ["recommended-features", "curl", "geos", "iconv"] + "features": ["recommended-features", "curl", "geos", "iconv", "openssl"] } ], "builtin-baseline": "0857a4b08c14030bbe41e80accb2b1fddb047a74" From ed39ad75618a3b2ca97e46b2818cad8b306eb8bc Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 8 Dec 2024 19:28:49 +0100 Subject: [PATCH 25/27] LINT: indicate numpy docstrings for ruff pydocstyle rules (#507) STYLE: indicate numpy docstrings for ruff pydocstyle rules --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index ed0471bc..c448ac80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -206,3 +206,6 @@ section-order = [ "geopandas.tests", "geopandas.testing", ] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" From 05143003b14e1a6cebc06d8126660dcf63857f62 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Dec 2024 09:07:14 -0800 Subject: [PATCH 26/27] Bump astral-sh/setup-uv from 4 to 5 (#514) --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index cc81a14a..fe36ba9f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -273,7 +273,7 @@ jobs: allow-prereleases: true - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@v5 - name: Create virtual environment (Linux / MacOS) # use uv to create a virtual environment, then add it to environment From 07416f4adf8854857a920351a1530301f432f22a Mon Sep 17 00:00:00 2001 From: Pieter Roggemans Date: Mon, 23 Dec 2024 18:07:58 +0100 Subject: [PATCH 27/27] CI: use pyarrow-core as dependency in tests (#509) --- ci/envs/latest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/envs/latest.yml b/ci/envs/latest.yml index c75ab4b7..ff8ad623 100644 --- a/ci/envs/latest.yml +++ b/ci/envs/latest.yml @@ -7,4 +7,4 @@ dependencies: - pytest - shapely>=2 - geopandas-base - - pyarrow + - pyarrow-core