Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Schema calculator from array #5

Merged
merged 6 commits into from
Dec 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@ set(GEOARROW_GEOS_VERSION_PATCH "${GEOARROW_GEOS_VERSION_PATCH}")

option(GEOARROW_GEOS_BUILD_TESTS "Build tests" OFF)

# Ensure geoarrow_c
# Ensure geoarrow_c with namespace
set(GEOARROW_NAMESPACE GeoArrowGEOS)
FetchContent_Declare(
geoarrow_c
URL https://github.com/geoarrow/geoarrow-c/archive/898af28230cf11c4804dae64090993c8c3173bd9.zip
URL https://github.com/geoarrow/geoarrow-c/archive/22794ce83fae1e2e99511508fa936c1e4cb115cb.zip
URL_HASH
SHA256=bbac8132f64a59db88983347a0337e8a37a8d50f35af248075edc59ce934547b)
SHA256=3631aa2c0883a76d18bfa56395f780f8ff6eec49b2fad21c24f72bb3e15ceed7)

FetchContent_MakeAvailable(geoarrow_c)

Expand Down
275 changes: 241 additions & 34 deletions src/geoarrow_geos/geoarrow_geos.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,40 +61,6 @@ GeoArrowGEOSErrorCode GeoArrowGEOSArrayBuilderCreate(
return GEOARROW_OK;
}

GeoArrowGEOSErrorCode GeoArrowGEOSMakeSchema(int32_t encoding, int32_t wkb_type,
struct ArrowSchema* out) {
enum GeoArrowType type = GEOARROW_TYPE_UNINITIALIZED;
enum GeoArrowGeometryType geometry_type = GEOARROW_GEOMETRY_TYPE_GEOMETRY;
enum GeoArrowDimensions dimensions = GEOARROW_DIMENSIONS_UNKNOWN;
enum GeoArrowCoordType coord_type = GEOARROW_COORD_TYPE_UNKNOWN;

switch (encoding) {
case GEOARROW_GEOS_ENCODING_WKT:
type = GEOARROW_TYPE_WKT;
break;
case GEOARROW_GEOS_ENCODING_WKB:
type = GEOARROW_TYPE_WKB;
break;
case GEOARROW_GEOS_ENCODING_GEOARROW:
coord_type = GEOARROW_COORD_TYPE_SEPARATE;
break;
case GEOARROW_GEOS_ENCODING_GEOARROW_INTERLEAVED:
coord_type = GEOARROW_COORD_TYPE_INTERLEAVED;
break;
default:
return EINVAL;
}

if (type == GEOARROW_TYPE_UNINITIALIZED) {
geometry_type = wkb_type % 1000;
dimensions = wkb_type / 1000 + 1;
type = GeoArrowMakeType(geometry_type, dimensions, GEOARROW_COORD_TYPE_SEPARATE);
}

GEOARROW_RETURN_NOT_OK(GeoArrowSchemaInitExtension(out, type));
return GEOARROW_OK;
}

static GeoArrowErrorCode GeoArrowGEOSArrayBuilderEnsureCoords(
struct GeoArrowGEOSArrayBuilder* builder, uint32_t n_coords, int n_dims) {
int64_t n_required = n_coords * n_dims;
Expand Down Expand Up @@ -891,3 +857,244 @@ void GeoArrowGEOSArrayReaderDestroy(struct GeoArrowGEOSArrayReader* reader) {

free(reader);
}

struct GeoArrowGEOSSchemaCalculator {
int geometry_type;
int dimensions;
};

GeoArrowGEOSErrorCode GeoArrowGEOSSchemaCalculatorCreate(
struct GeoArrowGEOSSchemaCalculator** out) {
struct GeoArrowGEOSSchemaCalculator* calc =
(struct GeoArrowGEOSSchemaCalculator*)malloc(
sizeof(struct GeoArrowGEOSSchemaCalculator));
if (calc == NULL) {
*out = NULL;
return ENOMEM;
}

calc->geometry_type = -1;
calc->dimensions = GEOARROW_DIMENSIONS_UNKNOWN;
*out = calc;

return GEOARROW_OK;
}

static int GeometryType2(int x, int y) {
switch (x) {
case -1:
return y;
case GEOARROW_GEOMETRY_TYPE_GEOMETRY:
return x;
case GEOARROW_GEOMETRY_TYPE_POINT:
switch (y) {
case -1:
return x;
case GEOARROW_TYPE_POINT:
case GEOARROW_TYPE_MULTIPOINT:
return y;
default:
return GEOARROW_GEOMETRY_TYPE_GEOMETRY;
}
case GEOARROW_GEOMETRY_TYPE_LINESTRING:
switch (y) {
case -1:
return x;
case GEOARROW_TYPE_LINESTRING:
case GEOARROW_TYPE_MULTILINESTRING:
return y;
default:
return GEOARROW_GEOMETRY_TYPE_GEOMETRY;
}
case GEOARROW_GEOMETRY_TYPE_POLYGON:
switch (y) {
case -1:
return x;
case GEOARROW_TYPE_POLYGON:
case GEOARROW_TYPE_MULTIPOLYGON:
return y;
default:
return GEOARROW_GEOMETRY_TYPE_GEOMETRY;
}
case GEOARROW_GEOMETRY_TYPE_MULTIPOINT:
switch (y) {
case -1:
return x;
case GEOARROW_TYPE_POINT:
case GEOARROW_TYPE_MULTIPOINT:
return x;
default:
return GEOARROW_GEOMETRY_TYPE_GEOMETRY;
}
case GEOARROW_GEOMETRY_TYPE_MULTILINESTRING:
switch (y) {
case -1:
return x;
case GEOARROW_TYPE_LINESTRING:
case GEOARROW_TYPE_MULTILINESTRING:
return x;
default:
return GEOARROW_GEOMETRY_TYPE_GEOMETRY;
}
case GEOARROW_GEOMETRY_TYPE_MULTIPOLYGON:
switch (y) {
case -1:
return x;
case GEOARROW_TYPE_POLYGON:
case GEOARROW_TYPE_MULTIPOLYGON:
return x;
default:
return GEOARROW_GEOMETRY_TYPE_GEOMETRY;
}
case GEOARROW_GEOMETRY_TYPE_GEOMETRYCOLLECTION:
switch (y) {
case -1:
return x;
case GEOARROW_GEOMETRY_TYPE_GEOMETRYCOLLECTION:
return x;
default:
return GEOARROW_GEOMETRY_TYPE_GEOMETRY;
}
default:
return GEOARROW_GEOMETRY_TYPE_GEOMETRY;
}
}

static int Dimensions2(int x, int y) {
switch (x) {
case GEOARROW_DIMENSIONS_UNKNOWN:
return y;
case GEOARROW_DIMENSIONS_XY:
switch (y) {
case GEOARROW_DIMENSIONS_UNKNOWN:
return x;
default:
return y;
}
case GEOARROW_DIMENSIONS_XYZ:
switch (y) {
case GEOARROW_DIMENSIONS_UNKNOWN:
return x;
case GEOARROW_DIMENSIONS_XYM:
return GEOARROW_DIMENSIONS_XYZM;
default:
return y;
}
case GEOARROW_DIMENSIONS_XYM:
switch (y) {
case GEOARROW_DIMENSIONS_UNKNOWN:
return x;
case GEOARROW_DIMENSIONS_XYZ:
return GEOARROW_DIMENSIONS_XYZM;
default:
return y;
}
default:
return GEOARROW_DIMENSIONS_XYZM;
}
}

void GeoArrowGEOSSchemaCalculatorIngest(struct GeoArrowGEOSSchemaCalculator* calc,
const int32_t* wkb_type, size_t n) {
for (size_t i = 0; i < n; i++) {
if (wkb_type[i] == 0) {
continue;
}

calc->geometry_type = GeometryType2(calc->geometry_type, wkb_type[i] % 1000);
calc->dimensions = Dimensions2(calc->dimensions, wkb_type[i] / 1000);
}
}

GeoArrowGEOSErrorCode GeoArrowGEOSSchemaCalculatorFinish(
struct GeoArrowGEOSSchemaCalculator* calc, enum GeoArrowGEOSEncoding encoding,
struct ArrowSchema* out) {
enum GeoArrowCoordType coord_type;
switch (encoding) {
case GEOARROW_GEOS_ENCODING_WKT:
case GEOARROW_GEOS_ENCODING_WKB:
return GeoArrowGEOSMakeSchema(encoding, 0, out);
case GEOARROW_GEOS_ENCODING_GEOARROW:
coord_type = GEOARROW_COORD_TYPE_INTERLEAVED;
break;
case GEOARROW_GEOS_ENCODING_GEOARROW_INTERLEAVED:
coord_type = GEOARROW_COORD_TYPE_INTERLEAVED;
break;
default:
return EINVAL;
}

enum GeoArrowGeometryType geometry_type;
switch (calc->geometry_type) {
case GEOARROW_GEOMETRY_TYPE_POINT:
case GEOARROW_GEOMETRY_TYPE_LINESTRING:
case GEOARROW_GEOMETRY_TYPE_POLYGON:
case GEOARROW_GEOMETRY_TYPE_MULTIPOINT:
case GEOARROW_GEOMETRY_TYPE_MULTILINESTRING:
case GEOARROW_GEOMETRY_TYPE_MULTIPOLYGON:
geometry_type = (enum GeoArrowGeometryType)calc->geometry_type;
break;
case -1:
// We don't have an "empty"/"null" type to return, but "POINT" is also
// not quite right.
default:
return GeoArrowGEOSMakeSchema(GEOARROW_GEOS_ENCODING_WKB, 0, out);
}

enum GeoArrowDimensions dimensions;
switch (calc->dimensions) {
case GEOARROW_DIMENSIONS_UNKNOWN:
dimensions = GEOARROW_DIMENSIONS_XY;
break;
case GEOARROW_DIMENSIONS_XY:
case GEOARROW_DIMENSIONS_XYZ:
case GEOARROW_DIMENSIONS_XYM:
case GEOARROW_DIMENSIONS_XYZM:
dimensions = (enum GeoArrowDimensions)calc->dimensions;
break;
default:
return GeoArrowGEOSMakeSchema(GEOARROW_GEOS_ENCODING_WKB, 0, out);
}

enum GeoArrowType type = GeoArrowMakeType(geometry_type, dimensions, coord_type);
GEOARROW_RETURN_NOT_OK(GeoArrowSchemaInitExtension(out, type));
return GEOARROW_OK;
}

void GeoArrowGEOSSchemaCalculatorDestroy(struct GeoArrowGEOSSchemaCalculator* calc) {
free(calc);
}

GeoArrowGEOSErrorCode GeoArrowGEOSMakeSchema(int32_t encoding, int32_t wkb_type,
struct ArrowSchema* out) {
enum GeoArrowType type = GEOARROW_TYPE_UNINITIALIZED;
enum GeoArrowGeometryType geometry_type = GEOARROW_GEOMETRY_TYPE_GEOMETRY;
enum GeoArrowDimensions dimensions = GEOARROW_DIMENSIONS_UNKNOWN;
enum GeoArrowCoordType coord_type = GEOARROW_COORD_TYPE_UNKNOWN;

switch (encoding) {
case GEOARROW_GEOS_ENCODING_WKT:
type = GEOARROW_TYPE_WKT;
break;
case GEOARROW_GEOS_ENCODING_WKB:
type = GEOARROW_TYPE_WKB;
break;
case GEOARROW_GEOS_ENCODING_GEOARROW:
coord_type = GEOARROW_COORD_TYPE_SEPARATE;
break;
case GEOARROW_GEOS_ENCODING_GEOARROW_INTERLEAVED:
coord_type = GEOARROW_COORD_TYPE_INTERLEAVED;
break;
default:
return EINVAL;
}

if (type == GEOARROW_TYPE_UNINITIALIZED) {
geometry_type = wkb_type % 1000;
dimensions = wkb_type / 1000 + 1;
type = GeoArrowMakeType(geometry_type, dimensions, coord_type);
}

GEOARROW_RETURN_NOT_OK(GeoArrowSchemaInitExtension(out, type));
return GEOARROW_OK;
}
67 changes: 64 additions & 3 deletions src/geoarrow_geos/geoarrow_geos.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ const char* GeoArrowGEOSVersionGeoArrow(void);

struct GeoArrowGEOSArrayBuilder;

GeoArrowGEOSErrorCode GeoArrowGEOSMakeSchema(int32_t encoding, int32_t wkb_type,
struct ArrowSchema* out);

GeoArrowGEOSErrorCode GeoArrowGEOSArrayBuilderCreate(
GEOSContextHandle_t handle, struct ArrowSchema* schema,
struct GeoArrowGEOSArrayBuilder** out);
Expand Down Expand Up @@ -108,6 +105,70 @@ GeoArrowGEOSErrorCode GeoArrowGEOSArrayReaderRead(struct GeoArrowGEOSArrayReader

void GeoArrowGEOSArrayReaderDestroy(struct GeoArrowGEOSArrayReader* reader);

struct GeoArrowGEOSSchemaCalculator;

GeoArrowGEOSErrorCode GeoArrowGEOSSchemaCalculatorCreate(
struct GeoArrowGEOSSchemaCalculator** out);

void GeoArrowGEOSSchemaCalculatorIngest(struct GeoArrowGEOSSchemaCalculator* calc,
const int32_t* wkb_type, size_t n);

GeoArrowGEOSErrorCode GeoArrowGEOSSchemaCalculatorFinish(
struct GeoArrowGEOSSchemaCalculator* calc, enum GeoArrowGEOSEncoding encoding,
struct ArrowSchema* out);

void GeoArrowGEOSSchemaCalculatorDestroy(struct GeoArrowGEOSSchemaCalculator* calc);

GeoArrowGEOSErrorCode GeoArrowGEOSMakeSchema(int32_t encoding, int32_t wkb_type,
struct ArrowSchema* out);

static inline int32_t GeoArrowGEOSWKBType(GEOSContextHandle_t handle,
const GEOSGeometry* geom) {
if (geom == NULL || GEOSGetNumCoordinates_r(handle, geom) == 0) {
return 0;
}

int n_dim = GEOSGeom_getCoordinateDimension_r(handle, geom);

// Not sure how GEOS handles M in newer versions
int32_t wkb_type;
if (n_dim == 3) {
wkb_type = 2000;
} else {
wkb_type = 0;
}

int type_id = GEOSGeomTypeId_r(handle, geom);
switch (type_id) {
case GEOS_POINT:
wkb_type += 1;
break;
case GEOS_LINEARRING:
case GEOS_LINESTRING:
wkb_type += 2;
break;
case GEOS_POLYGON:
wkb_type += 3;
break;
case GEOS_MULTIPOINT:
wkb_type += 4;
break;
case GEOS_MULTILINESTRING:
wkb_type += 5;
break;
case GEOS_MULTIPOLYGON:
wkb_type += 6;
break;
case GEOS_GEOMETRYCOLLECTION:
wkb_type += 7;
break;
default:
break;
}

return wkb_type;
}

#ifdef __cplusplus
}
#endif
Expand Down
Loading