Skip to content

Commit

Permalink
react helpers for parquet and accessors
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron committed Oct 4, 2023
1 parent a72c935 commit 20df93a
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 124 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"@geoarrow/deck.gl-layers": "0.1.0-beta.4",
"apache-arrow": "^13.0.0",
"maplibre-gl": "^3.3.1",
"parquet-wasm": "^0.5.0-alpha.1",
"parquet-wasm": "0.5.0-alpha.1",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-map-gl": "^7.1.5"
Expand Down
64 changes: 64 additions & 0 deletions src/accessor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import * as arrow from "apache-arrow";
import { parseParquet } from "./parquet";
import { useState, useEffect } from "react";

// /**
// * @template T
// *
// * @param {string} key
// * @returns {[T, (value: T) => void]}
// */
// export function useModelState(key) {
// let model = useModel();
// let [value, setValue] = React.useState(model.get(key));
// React.useEffect(() => {
// let callback = () => setValue(model.get(key));
// model.on(`change:${key}`, callback);
// return () => model.off(`change:${key}`, callback);
// }, [model, key]);
// return [
// value,
// (value) => {
// model.set(key, value);
// model.save_changes();
// },
// ];
// }

export function useTableBufferState(
wasmReady: boolean,
dataRaw: DataView
): [arrow.Table | null] {
const [dataTable, setDataTable] = useState<arrow.Table | null>(null);
// Only parse the parquet buffer when the data itself or wasmReady has changed
useEffect(() => {
const callback = () => {
if (wasmReady && dataRaw && dataRaw.byteLength > 0) {
setDataTable(parseParquet(dataRaw));
}
};
callback();
}, [wasmReady, dataRaw]);

return [dataTable];
}

export function useAccessorState(wasmReady: boolean, accessorRaw: any): any {
const [accessorValue, setAccessorValue] = useState(null);

// Only parse the parquet buffer when the data itself or wasmReady has changed
useEffect(() => {
const callback = () => {
setAccessorValue(
accessorRaw instanceof DataView
? wasmReady && accessorRaw.byteLength > 0
? parseParquet(accessorRaw).getChildAt(0)
: null
: accessorRaw
);
};
callback();
}, [wasmReady, accessorRaw]);

return [accessorValue];
}
89 changes: 89 additions & 0 deletions src/parquet.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import { useEffect, useState } from "react";
import _initParquetWasm, { readParquet } from "parquet-wasm/esm/arrow2";
import * as arrow from "apache-arrow";

// NOTE: this version must be synced exactly with the parquet-wasm version in
// use.
const PARQUET_WASM_VERSION = "0.5.0-alpha.1";
const PARQUET_WASM_CDN_URL = `https://cdn.jsdelivr.net/npm/parquet-wasm@${PARQUET_WASM_VERSION}/esm/arrow2_bg.wasm`;
let WASM_READY: boolean = false;

export async function initParquetWasm() {
if (WASM_READY) {
return;
}

await _initParquetWasm(PARQUET_WASM_CDN_URL);
WASM_READY = true;
}

/**
* Parse a Parquet buffer to an Arrow JS table
*/
export function parseParquet(dataView: DataView): arrow.Table {
if (!WASM_READY) {
throw new Error("wasm not ready");
}

console.time("readParquet");

// TODO: use arrow-js-ffi for more memory-efficient wasm --> js transfer
const arrowIPCBuffer = readParquet(new Uint8Array(dataView.buffer)).intoIPC();
const arrowTable = arrow.tableFromIPC(arrowIPCBuffer);

console.timeEnd("readParquet");

return arrowTable;
}

export function useParquetWasm(): [boolean] {
const [wasmReady, setWasmReady] = useState<boolean>(false);

// Init parquet wasm
useEffect(() => {
const callback = async () => {
await initParquetWasm();
setWasmReady(true);
};

callback();
}, []);

return [wasmReady];
}

// // NOTE: this was an attempt to only parse Parquet for the initial data and
// // whenever the data buffer changed. But I had issues where the wasm wasn't
// // ready yet when the original data needed to be instantiated
// //
// // NOTE2: I worked around this by adding a useEffect in the main App().. so this
// // function can probably be deleted
// function useModelParquetState(
// key: string,
// ...deps
// ): [arrow.Table | undefined, (value: DataView) => void] {
// let model = useModel();

// console.log("WASM_READY", WASM_READY);
// let [table, setTable] = useState<arrow.Table | undefined>(
// WASM_READY ? parseParquet(model.get(key)) : undefined
// );
// console.log(deps);
// useEffect(() => {
// let parquetCallback = () => {
// console.log("inside parquetCallback");
// setTable(WASM_READY ? parseParquet(model.get(key)) : undefined);
// };
// model.on(`change:${key}`, parquetCallback);
// return () => model.off(`change:${key}`, parquetCallback);
// }, [model, key, deps]);

// console.log("useModelParquetState table", table);
// return [
// table,
// (value) => {
// model.set(key, value);
// model.save_changes();
// },
// ];
// }
140 changes: 19 additions & 121 deletions src/point.tsx
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import * as React from "react";
import { useState, useEffect } from "react";
import { createRender, useModel, useModelState } from "@anywidget/react";
import { createRender, useModelState } from "@anywidget/react";
import Map from "react-map-gl/maplibre";
import DeckGL from "@deck.gl/react/typed";
import * as arrow from "apache-arrow";
import { GeoArrowScatterplotLayer } from "@geoarrow/deck.gl-layers";
import initParquetWasm, { readParquet } from "parquet-wasm/esm/arrow2";
import { useParquetWasm } from "./parquet";
import { useAccessorState, useTableBufferState } from "./accessor";

const INITIAL_VIEW_STATE = {
latitude: 10,
Expand All @@ -18,97 +17,10 @@ const INITIAL_VIEW_STATE = {
const MAP_STYLE =
"https://basemaps.cartocdn.com/gl/positron-nolabels-gl-style/style.json";

const PARQUET_WASM_VERSION = "0.5.0-alpha.1";
const PARQUET_WASM_CDN_URL = `https://cdn.jsdelivr.net/npm/parquet-wasm@${PARQUET_WASM_VERSION}/esm/arrow2_bg.wasm`;

let WASM_READY: boolean = false;

/**
* Parse a Parquet buffer to an Arrow JS table
*/
function parseParquet(dataView: DataView): arrow.Table {
if (!WASM_READY) {
throw new Error("wasm not ready");
}

console.time("readParquet");

// TODO: use arrow-js-ffi for more memory-efficient wasm --> js transfer
const arrowIPCBuffer = readParquet(new Uint8Array(dataView.buffer)).intoIPC();
const arrowTable = arrow.tableFromIPC(arrowIPCBuffer);

console.timeEnd("readParquet");

return arrowTable;
}

// NOTE: this was an attempt to only parse Parquet for the initial data and
// whenever the data buffer changed. But I had issues where the wasm wasn't
// ready yet when the original data needed to be instantiated
//
// NOTE2: I worked around this by adding a useEffect in the main App().. so this
// function can probably be deleted
function useModelParquetState(
key: string,
...deps
): [arrow.Table | undefined, (value: DataView) => void] {
let model = useModel();

console.log("WASM_READY", WASM_READY);
let [table, setTable] = useState<arrow.Table | undefined>(
WASM_READY ? parseParquet(model.get(key)) : undefined
);
console.log(deps);
useEffect(() => {
let parquetCallback = () => {
console.log("inside parquetCallback");
setTable(WASM_READY ? parseParquet(model.get(key)) : undefined);
};
model.on(`change:${key}`, parquetCallback);
return () => model.off(`change:${key}`, parquetCallback);
}, [model, key, deps]);

console.log("useModelParquetState table", table);
return [
table,
(value) => {
model.set(key, value);
model.save_changes();
},
];
}

function App() {
const [wasmReady, setWasmReady] = React.useState<boolean>(false);

// Init parquet wasm
React.useEffect(() => {
const callback = async () => {
await initParquetWasm(PARQUET_WASM_CDN_URL);
setWasmReady(true);
WASM_READY = true;
};

callback();
}, []);

let [dataView] = useModelState<DataView>("table_buffer");

const [dataTable, setDataTable] = useState<arrow.Table | null>(null);

// Only parse the table's parquet buffer when the buffer itself or wasmReady
// has changed
useEffect(() => {
const callback = () => {
if (wasmReady && dataView && dataView.byteLength > 0) {
const arrowTable = parseParquet(dataView);
setDataTable(arrowTable);
}
};

callback();
}, [wasmReady, dataView]);
const [wasmReady] = useParquetWasm();

let [dataRaw] = useModelState<DataView>("table_buffer");
let [radiusUnits] = useModelState("radius_units");
let [radiusScale] = useModelState("radius_scale");
let [radiusMinPixels] = useModelState("radius_min_pixels");
Expand All @@ -121,10 +33,16 @@ function App() {
let [filled] = useModelState("filled");
let [billboard] = useModelState("billboard");
let [antialiasing] = useModelState("antialiasing");
let [getRadius] = useModelState("get_radius");
let [getFillColor] = useModelState("get_fill_color");
let [getLineColor] = useModelState("get_line_color");
let [getLineWidth] = useModelState("get_line_width");
let [getRadiusRaw] = useModelState("get_radius");
let [getFillColorRaw] = useModelState("get_fill_color");
let [getLineColorRaw] = useModelState("get_line_color");
let [getLineWidthRaw] = useModelState("get_line_width");

const [dataTable] = useTableBufferState(wasmReady, dataRaw);
const [getRadius] = useAccessorState(wasmReady, getRadiusRaw);
const [getFillColor] = useAccessorState(wasmReady, getFillColorRaw);
const [getLineColor] = useAccessorState(wasmReady, getLineColorRaw);
const [getLineWidth] = useAccessorState(wasmReady, getLineWidthRaw);

const layers = [];
if (wasmReady && dataTable) {
Expand All @@ -144,30 +62,10 @@ function App() {
...(filled && { filled }),
...(billboard && { billboard }),
...(antialiasing && { antialiasing }),
...(getRadius && {
getRadius:
getRadius instanceof DataView
? parseParquet(getRadius).getChildAt(0)
: getRadius,
}),
...(getFillColor && {
getFillColor:
getFillColor instanceof DataView
? parseParquet(getFillColor).getChildAt(0)
: getFillColor,
}),
...(getLineColor && {
getLineColor:
getLineColor instanceof DataView
? parseParquet(getLineColor).getChildAt(0)
: getLineColor,
}),
...(getLineWidth && {
getLineWidth:
getLineWidth instanceof DataView
? parseParquet(getLineWidth).getChildAt(0)
: getLineWidth,
}),
...(getRadius && { getRadius }),
...(getFillColor && { getFillColor }),
...(getLineColor && { getLineColor }),
...(getLineWidth && { getLineWidth }),
});
layers.push(layer);
}
Expand Down
4 changes: 2 additions & 2 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1194,7 +1194,7 @@ __metadata:
languageName: node
linkType: hard

"parquet-wasm@npm:^0.5.0-alpha.1":
"parquet-wasm@npm:0.5.0-alpha.1":
version: 0.5.0-alpha.1
resolution: "parquet-wasm@npm:0.5.0-alpha.1"
checksum: a0852c0e156d3862c8a93954aaf6a4aaf4ce670501f6871bae5e54ffe1e62286bf8dc70c7a1383c4ba5df210d6c2a3ccdc378e934745ca99497ed6fdde711e26
Expand Down Expand Up @@ -1313,7 +1313,7 @@ __metadata:
esbuild: ^0.19.2
esbuild-plugin-wasm: ^1.1.0
maplibre-gl: ^3.3.1
parquet-wasm: ^0.5.0-alpha.1
parquet-wasm: 0.5.0-alpha.1
react: ^18.2.0
react-dom: ^18.2.0
react-map-gl: ^7.1.5
Expand Down

0 comments on commit 20df93a

Please sign in to comment.