Skip to content

Commit

Permalink
Bump to apache-arrow 15 and simplify worker utils (#22)
Browse files Browse the repository at this point in the history
* Bump to apache-arrow 15 and simplify worker utils

* update readme
  • Loading branch information
kylebarron authored Jan 22, 2024
1 parent 736e8c3 commit 020c0a8
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 216 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Refer to the [`algorithm` namespace](https://geoarrow.github.io/geoarrow-js/modu

## Web Worker utilities

Refer to the [`worker` namespace](https://geoarrow.github.io/geoarrow-js/modules/worker.html). Note that due to limitations in Arrow JS (as of v14) you **must** use `preparePostMessage` before a call to `structuredClone` or `postMessage`, to ensure it can correctly be rehydrated on the worker.
Refer to the [`worker` namespace](https://geoarrow.github.io/geoarrow-js/modules/worker.html). Use `preparePostMessage` to obtain references to all underlying `ArrayBuffer` objects, so they can be transfered instead of copied.

```ts
import * as arrow from "apache-arrow";
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@
"src/"
],
"peerDependencies": {
"apache-arrow": ">=14"
"apache-arrow": ">=15"
},
"devDependencies": {
"@rollup/plugin-terser": "^0.4.3",
"@rollup/plugin-typescript": "^11.1.2",
"@types/node": "^20.9.3",
"@types/proj4": "^2",
"apache-arrow": "^14",
"apache-arrow": "^15",
"esbuild": "^0.19.8",
"gh-pages": "^6.1.0",
"prettier": "^3.1.0",
Expand Down
146 changes: 3 additions & 143 deletions src/worker/rehydrate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,11 @@ import { Data } from "apache-arrow/data";
import { Vector } from "apache-arrow/vector";
import { Field } from "apache-arrow/schema";
import type { Buffers } from "apache-arrow/data";
import {
LineString,
Point,
Polygon,
MultiLineString,
MultiPoint,
MultiPolygon,
} from "../type";
import { Polygon, isPolygon } from "../type";
import { PolygonData } from "../data";

// Typedefs that include only the information kept from a structuredClone
type PostMessageDataType = Pick<DataType, "children"> & {
__type: Type;
};
type PostMessageDataType = Pick<DataType, "children" | "typeId">;
type PostMessageField = Pick<Field, "name" | "nullable" | "metadata"> & {
type: PostMessageDataType;
};
Expand All @@ -62,16 +53,7 @@ type PostMessageVector<T extends DataType> = Pick<
> & { type: PostMessageDataType };

function rehydrateType<T extends Type>(type: PostMessageDataType): DataType<T> {
// Note: by default in Arrow JS, the `DataType` is a class with no identifying
// attribute. Since a `structuredClone` is unable to maintain class
// information, the result of `structuredClone(new arrow.Utf8())` is an empty
// object `{}`.
//
// To get around this, in `preparePostMessage`, we manually assign the
// `typeId` (usually a getter) onto `__type`. Then when rehydrating the type,
// we can match on the `__type`, checking `arrow.Type` values, and
// reconstitute a full `arrow.DataType` object.
switch (type.__type) {
switch (type.typeId) {
case Type.Null:
return new Null() as DataType<T>;
case Type.Int:
Expand Down Expand Up @@ -158,7 +140,6 @@ function rehydrateField(field: PostMessageField): Field {
export function rehydrateData<T extends DataType>(
data: PostMessageData<T>,
): Data<T> {
// @ts-expect-error
const children = data.children.map((childData) => rehydrateData(childData));
const dictionary = data.dictionary
? rehydrateVector(data.dictionary)
Expand Down Expand Up @@ -209,124 +190,3 @@ export function rehydratePolygonData(
// on the JS side.
return data;
}

// NOTE: these functions are copied from `type.ts` to work on __type

/** Check that the given type is a Point data type */
function isPoint(type: DataType): type is Point {
// @ts-expect-error
if (type.__type === Type.FixedSizeList) {
// Check list size
// @ts-expect-error
if (![2, 3, 4].includes(type.listSize)) {
return false;
}

// Check child of FixedSizeList is floating type
// @ts-expect-error
if (type.children[0].__type !== Type.Float) {
return false;
}

return true;
}

// @ts-expect-error
if (type.__type === Type.Struct) {
// Check number of children
if (![2, 3, 4].includes(type.children.length)) {
return false;
}

// Check that children have correct field names
if (
!type.children.every((field) => ["x", "y", "z", "m"].includes(field.name))
) {
return false;
}

// @ts-expect-error
if (!type.children.every((field) => field.__type === Type.Float)) {
return false;
}

return true;
}

return false;
}

function isLineString(type: DataType): type is LineString {
// Check the outer type is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a point type
if (!isPoint(type.children[0].type)) {
return false;
}

return true;
}

function isPolygon(type: DataType): type is Polygon {
// Check the outer vector is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a linestring vector
if (!isLineString(type.children[0].type)) {
return false;
}

return true;
}

function isMultiPoint(type: DataType): type is MultiPoint {
// Check the outer vector is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a point vector
if (!isPoint(type.children[0].type)) {
return false;
}

return true;
}

function isMultiLineString(type: DataType): type is MultiLineString {
// Check the outer vector is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a linestring vector
if (!isLineString(type.children[0].type)) {
return false;
}

return true;
}

function isMultiPolygon(type: DataType): type is MultiPolygon {
// Check the outer vector is a List
// @ts-expect-error
if (type.__type !== Type.List) {
return false;
}

// Check the child is a polygon vector
if (!isPolygon(type.children[0].type)) {
return false;
}

return true;
}
16 changes: 1 addition & 15 deletions src/worker/transferable.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { DataType } from "apache-arrow/type";
import { BufferType, Type } from "apache-arrow/enum";
import { BufferType } from "apache-arrow/enum";
import { Data } from "apache-arrow/data";
import { Vector } from "apache-arrow/vector";
import { hardClone } from "./hard-clone";
Expand Down Expand Up @@ -30,7 +30,6 @@ export function preparePostMessage<T extends DataType>(
transferArrayBuffers.push(...arrayBuffers);
}
const vector = new Vector(postMessageDatas);
assignTypeIdOnType(vector.type);
return [vector, transferArrayBuffers];
}

Expand Down Expand Up @@ -74,18 +73,5 @@ export function preparePostMessage<T extends DataType>(
transferArrayBuffers.push(input.buffers[BufferType.TYPE].buffer);
}

assignTypeIdOnType(input.type);

return [input, transferArrayBuffers];
}

function assignTypeIdOnType<T extends Type>(type: DataType<T>): void {
// @ts-expect-error __type does not exist
type.__type = type.typeId;

if (type.children && type.children.length > 0) {
for (const child of type.children) {
assignTypeIdOnType(child.type);
}
}
}
Loading

0 comments on commit 020c0a8

Please sign in to comment.