From 020c0a8baeae0e5b08735e8f9e3c1771663f250d Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 22 Jan 2024 10:27:54 -0500 Subject: [PATCH] Bump to apache-arrow 15 and simplify worker utils (#22) * Bump to apache-arrow 15 and simplify worker utils * update readme --- README.md | 2 +- package.json | 4 +- src/worker/rehydrate.ts | 146 +------------------------------------ src/worker/transferable.ts | 16 +--- yarn.lock | 97 +++++++++++------------- 5 files changed, 49 insertions(+), 216 deletions(-) diff --git a/README.md b/README.md index e668993..dacbc80 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Refer to the [`algorithm` namespace](https://geoarrow.github.io/geoarrow-js/modu ## Web Worker utilities -Refer to the [`worker` namespace](https://geoarrow.github.io/geoarrow-js/modules/worker.html). Note that due to limitations in Arrow JS (as of v14) you **must** use `preparePostMessage` before a call to `structuredClone` or `postMessage`, to ensure it can correctly be rehydrated on the worker. +Refer to the [`worker` namespace](https://geoarrow.github.io/geoarrow-js/modules/worker.html). Use `preparePostMessage` to obtain references to all underlying `ArrayBuffer` objects, so they can be transfered instead of copied. ```ts import * as arrow from "apache-arrow"; diff --git a/package.json b/package.json index 6e8442e..eb9d885 100644 --- a/package.json +++ b/package.json @@ -44,14 +44,14 @@ "src/" ], "peerDependencies": { - "apache-arrow": ">=14" + "apache-arrow": ">=15" }, "devDependencies": { "@rollup/plugin-terser": "^0.4.3", "@rollup/plugin-typescript": "^11.1.2", "@types/node": "^20.9.3", "@types/proj4": "^2", - "apache-arrow": "^14", + "apache-arrow": "^15", "esbuild": "^0.19.8", "gh-pages": "^6.1.0", "prettier": "^3.1.0", diff --git a/src/worker/rehydrate.ts b/src/worker/rehydrate.ts index 4bc797d..5bf25b0 100644 --- a/src/worker/rehydrate.ts +++ b/src/worker/rehydrate.ts @@ -24,20 +24,11 @@ import { Data } from "apache-arrow/data"; import { Vector } from "apache-arrow/vector"; import { Field } from "apache-arrow/schema"; import type { Buffers } from "apache-arrow/data"; -import { - LineString, - Point, - Polygon, - MultiLineString, - MultiPoint, - MultiPolygon, -} from "../type"; +import { Polygon, isPolygon } from "../type"; import { PolygonData } from "../data"; // Typedefs that include only the information kept from a structuredClone -type PostMessageDataType = Pick & { - __type: Type; -}; +type PostMessageDataType = Pick; type PostMessageField = Pick & { type: PostMessageDataType; }; @@ -62,16 +53,7 @@ type PostMessageVector = Pick< > & { type: PostMessageDataType }; function rehydrateType(type: PostMessageDataType): DataType { - // Note: by default in Arrow JS, the `DataType` is a class with no identifying - // attribute. Since a `structuredClone` is unable to maintain class - // information, the result of `structuredClone(new arrow.Utf8())` is an empty - // object `{}`. - // - // To get around this, in `preparePostMessage`, we manually assign the - // `typeId` (usually a getter) onto `__type`. Then when rehydrating the type, - // we can match on the `__type`, checking `arrow.Type` values, and - // reconstitute a full `arrow.DataType` object. - switch (type.__type) { + switch (type.typeId) { case Type.Null: return new Null() as DataType; case Type.Int: @@ -158,7 +140,6 @@ function rehydrateField(field: PostMessageField): Field { export function rehydrateData( data: PostMessageData, ): Data { - // @ts-expect-error const children = data.children.map((childData) => rehydrateData(childData)); const dictionary = data.dictionary ? rehydrateVector(data.dictionary) @@ -209,124 +190,3 @@ export function rehydratePolygonData( // on the JS side. return data; } - -// NOTE: these functions are copied from `type.ts` to work on __type - -/** Check that the given type is a Point data type */ -function isPoint(type: DataType): type is Point { - // @ts-expect-error - if (type.__type === Type.FixedSizeList) { - // Check list size - // @ts-expect-error - if (![2, 3, 4].includes(type.listSize)) { - return false; - } - - // Check child of FixedSizeList is floating type - // @ts-expect-error - if (type.children[0].__type !== Type.Float) { - return false; - } - - return true; - } - - // @ts-expect-error - if (type.__type === Type.Struct) { - // Check number of children - if (![2, 3, 4].includes(type.children.length)) { - return false; - } - - // Check that children have correct field names - if ( - !type.children.every((field) => ["x", "y", "z", "m"].includes(field.name)) - ) { - return false; - } - - // @ts-expect-error - if (!type.children.every((field) => field.__type === Type.Float)) { - return false; - } - - return true; - } - - return false; -} - -function isLineString(type: DataType): type is LineString { - // Check the outer type is a List - // @ts-expect-error - if (type.__type !== Type.List) { - return false; - } - - // Check the child is a point type - if (!isPoint(type.children[0].type)) { - return false; - } - - return true; -} - -function isPolygon(type: DataType): type is Polygon { - // Check the outer vector is a List - // @ts-expect-error - if (type.__type !== Type.List) { - return false; - } - - // Check the child is a linestring vector - if (!isLineString(type.children[0].type)) { - return false; - } - - return true; -} - -function isMultiPoint(type: DataType): type is MultiPoint { - // Check the outer vector is a List - // @ts-expect-error - if (type.__type !== Type.List) { - return false; - } - - // Check the child is a point vector - if (!isPoint(type.children[0].type)) { - return false; - } - - return true; -} - -function isMultiLineString(type: DataType): type is MultiLineString { - // Check the outer vector is a List - // @ts-expect-error - if (type.__type !== Type.List) { - return false; - } - - // Check the child is a linestring vector - if (!isLineString(type.children[0].type)) { - return false; - } - - return true; -} - -function isMultiPolygon(type: DataType): type is MultiPolygon { - // Check the outer vector is a List - // @ts-expect-error - if (type.__type !== Type.List) { - return false; - } - - // Check the child is a polygon vector - if (!isPolygon(type.children[0].type)) { - return false; - } - - return true; -} diff --git a/src/worker/transferable.ts b/src/worker/transferable.ts index d9d07d5..ff51eaf 100644 --- a/src/worker/transferable.ts +++ b/src/worker/transferable.ts @@ -1,5 +1,5 @@ import { DataType } from "apache-arrow/type"; -import { BufferType, Type } from "apache-arrow/enum"; +import { BufferType } from "apache-arrow/enum"; import { Data } from "apache-arrow/data"; import { Vector } from "apache-arrow/vector"; import { hardClone } from "./hard-clone"; @@ -30,7 +30,6 @@ export function preparePostMessage( transferArrayBuffers.push(...arrayBuffers); } const vector = new Vector(postMessageDatas); - assignTypeIdOnType(vector.type); return [vector, transferArrayBuffers]; } @@ -74,18 +73,5 @@ export function preparePostMessage( transferArrayBuffers.push(input.buffers[BufferType.TYPE].buffer); } - assignTypeIdOnType(input.type); - return [input, transferArrayBuffers]; } - -function assignTypeIdOnType(type: DataType): void { - // @ts-expect-error __type does not exist - type.__type = type.typeId; - - if (type.children && type.children.length > 0) { - for (const child of type.children) { - assignTypeIdOnType(child.type); - } - } -} diff --git a/yarn.lock b/yarn.lock index fbf5d93..7f61d66 100644 --- a/yarn.lock +++ b/yarn.lock @@ -378,7 +378,7 @@ __metadata: "@rollup/plugin-typescript": "npm:^11.1.2" "@types/node": "npm:^20.9.3" "@types/proj4": "npm:^2" - apache-arrow: "npm:^14" + apache-arrow: "npm:^15" esbuild: "npm:^0.19.8" gh-pages: "npm:^6.1.0" prettier: "npm:^3.1.0" @@ -392,7 +392,7 @@ __metadata: typescript: "npm:^5.2.2" vitest: "npm:^0.34.6" peerDependencies: - apache-arrow: ">=14" + apache-arrow: ">=15" languageName: unknown linkType: soft @@ -678,6 +678,15 @@ __metadata: languageName: node linkType: hard +"@swc/helpers@npm:^0.5.2": + version: 0.5.3 + resolution: "@swc/helpers@npm:0.5.3" + dependencies: + tslib: "npm:^2.4.0" + checksum: 5ed4329cd36106e4c3c9c9fa710fae5b80521accce697d81030c42798c4653237f719269c24c26adf42579e15e1f720f31cd63983dea30debd298582a6cbd20a + languageName: node + linkType: hard + "@tsconfig/node10@npm:^1.0.7": version: 1.0.9 resolution: "@tsconfig/node10@npm:1.0.9" @@ -722,17 +731,17 @@ __metadata: languageName: node linkType: hard -"@types/command-line-args@npm:5.2.0": - version: 5.2.0 - resolution: "@types/command-line-args@npm:5.2.0" - checksum: 423121d2d083765f5b78d090115f3be82d53a39cec9de63719cbd07021e6330fab19b75e2290af1f7dda84efd7964dc498eb10b2b465991de27045db95aa1eef +"@types/command-line-args@npm:^5.2.1": + version: 5.2.3 + resolution: "@types/command-line-args@npm:5.2.3" + checksum: 3d90db5b4bbaabd049654a0d12fa378989ab0d76a0f98d4c606761b5a08ce76458df0f9bb175219e187b4cd57e285e6f836d23e86b2c3d997820854cc3ed9121 languageName: node linkType: hard -"@types/command-line-usage@npm:5.0.2": - version: 5.0.2 - resolution: "@types/command-line-usage@npm:5.0.2" - checksum: 9c0eabf5e86a405d118dcfb5f4bceae43080efe603a0f240664716a05283dcb389e94e999188d12b10a0aa4452a920445131f1011e7484403f146607cd2577f0 +"@types/command-line-usage@npm:^5.0.2": + version: 5.0.4 + resolution: "@types/command-line-usage@npm:5.0.4" + checksum: 7173c356ca8c9507feeeda8e660c52498929556e90be0cf2d09d35270c597481121cd0f006a74167c5577feebfbc75b648c0f8f01b8f06ce30bde9fe30d5ba40 languageName: node linkType: hard @@ -752,17 +761,12 @@ __metadata: languageName: node linkType: hard -"@types/node@npm:20.3.0": - version: 20.3.0 - resolution: "@types/node@npm:20.3.0" - checksum: c1fdc2d64313f53fbd0d2d4172e61b2e088c4474c2d33f448cbda54948bc52f56b0fb78d39f958d4add630cd91d03bee292a34615e94a05251b7a4cb335899de - languageName: node - linkType: hard - -"@types/pad-left@npm:2.1.1": - version: 2.1.1 - resolution: "@types/pad-left@npm:2.1.1" - checksum: db0191e39af416e5eca02f22424306a13b1bf24abb00873bbbb8c28de5085764ff1243c32548580260952588fbef6d9a5a34106276013420bcc79f538f4e8cc2 +"@types/node@npm:^20.6.0": + version: 20.11.5 + resolution: "@types/node@npm:20.11.5" + dependencies: + undici-types: "npm:~5.26.4" + checksum: 9f31c471047d7b3e240ce7b77ff29b0d15e83be7e3feafb3d0b0d0931122b438b1eefa302a5a2e1e9849914ff3fd76aafbd8ccb372efb1331ba048da63bce6f8 languageName: node linkType: hard @@ -921,23 +925,22 @@ __metadata: languageName: node linkType: hard -"apache-arrow@npm:^14": - version: 14.0.1 - resolution: "apache-arrow@npm:14.0.1" +"apache-arrow@npm:^15": + version: 15.0.0 + resolution: "apache-arrow@npm:15.0.0" dependencies: - "@types/command-line-args": "npm:5.2.0" - "@types/command-line-usage": "npm:5.0.2" - "@types/node": "npm:20.3.0" - "@types/pad-left": "npm:2.1.1" - command-line-args: "npm:5.2.1" - command-line-usage: "npm:7.0.1" - flatbuffers: "npm:23.5.26" + "@swc/helpers": "npm:^0.5.2" + "@types/command-line-args": "npm:^5.2.1" + "@types/command-line-usage": "npm:^5.0.2" + "@types/node": "npm:^20.6.0" + command-line-args: "npm:^5.2.1" + command-line-usage: "npm:^7.0.1" + flatbuffers: "npm:^23.5.26" json-bignum: "npm:^0.0.3" - pad-left: "npm:^2.1.0" - tslib: "npm:^2.5.3" + tslib: "npm:^2.6.2" bin: - arrow2csv: bin/arrow2csv.js - checksum: 09c31f00354de413b3b6961a6cb2848fe744407477ed3fc1461c1e5fbb55183662a711d4755266a3599b2ca3531f2801b94be93ebde5f8c9cbf084f6bcf7de4e + arrow2csv: bin/arrow2csv.cjs + checksum: aff749aec1e0eddfce30e7da6b7f417a6d152b6c34b551a687828a9ee742f52294fc76e7b276e2c3e2374de9bd850f5f1b72c108f6f496e7b25b059389f22585 languageName: node linkType: hard @@ -1159,7 +1162,7 @@ __metadata: languageName: node linkType: hard -"command-line-args@npm:5.2.1, command-line-args@npm:^5.2.1": +"command-line-args@npm:^5.2.1": version: 5.2.1 resolution: "command-line-args@npm:5.2.1" dependencies: @@ -1171,7 +1174,7 @@ __metadata: languageName: node linkType: hard -"command-line-usage@npm:7.0.1, command-line-usage@npm:^7.0.0": +"command-line-usage@npm:^7.0.0, command-line-usage@npm:^7.0.1": version: 7.0.1 resolution: "command-line-usage@npm:7.0.1" dependencies: @@ -1545,7 +1548,7 @@ __metadata: languageName: node linkType: hard -"flatbuffers@npm:23.5.26": +"flatbuffers@npm:^23.5.26": version: 23.5.26 resolution: "flatbuffers@npm:23.5.26" checksum: f4add4f11414d3bef3a6e10525709c1766cbe3084c59e1cd98d871bde89eaf1c076ccc55913efc6125bdd6558d254a0d28adb73b544c79a69f961ba89bd1833f @@ -2251,15 +2254,6 @@ __metadata: languageName: node linkType: hard -"pad-left@npm:^2.1.0": - version: 2.1.0 - resolution: "pad-left@npm:2.1.0" - dependencies: - repeat-string: "npm:^1.5.4" - checksum: 571cb662aaec902a462b8809d54e38aa09f07bbf85359af50ca5c96b49546103473ba1d056fc5b5f9611b7472e40fca901486f209d1498f214e74abefabdd016 - languageName: node - linkType: hard - "path-exists@npm:^4.0.0": version: 4.0.0 resolution: "path-exists@npm:4.0.0" @@ -2450,13 +2444,6 @@ __metadata: languageName: node linkType: hard -"repeat-string@npm:^1.5.4": - version: 1.6.1 - resolution: "repeat-string@npm:1.6.1" - checksum: 1b809fc6db97decdc68f5b12c4d1a671c8e3f65ec4a40c238bc5200e44e85bcc52a54f78268ab9c29fcf5fe4f1343e805420056d1f30fa9a9ee4c2d93e3cc6c0 - languageName: node - linkType: hard - "resolve@npm:^1.22.1": version: 1.22.8 resolution: "resolve@npm:1.22.8" @@ -2962,7 +2949,7 @@ __metadata: languageName: node linkType: hard -"tslib@npm:^2.5.3": +"tslib@npm:^2.4.0, tslib@npm:^2.6.2": version: 2.6.2 resolution: "tslib@npm:2.6.2" checksum: bd26c22d36736513980091a1e356378e8b662ded04204453d353a7f34a4c21ed0afc59b5f90719d4ba756e581a162ecbf93118dc9c6be5acf70aa309188166ca