Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JS Map support for encoding & decoding #5

Merged
merged 4 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 38 additions & 13 deletions src/Decoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ export type DecoderOptions<ContextType = undefined> = Readonly<
*/
useRawBinaryStrings: boolean;

/**
* If true, the decoder will use the Map object to store map values. If false, it will use plain
* objects. Defaults to false.
*
* Besides the type of container, the main difference is that Map objects support a wider range
* of key types. Plain objects only support string keys, while Map objects support strings,
jannotti marked this conversation as resolved.
Show resolved Hide resolved
* numbers, bigints, and Uint8Arrays.
*/
useMap: boolean;

/**
* Maximum string length.
*
Expand Down Expand Up @@ -82,18 +92,22 @@ const STATE_ARRAY = "array";
const STATE_MAP_KEY = "map_key";
const STATE_MAP_VALUE = "map_value";

type MapKeyType = string | number;
type MapKeyType = string | number | bigint | Uint8Array;

const isValidMapKeyType = (key: unknown): key is MapKeyType => {
return typeof key === "string" || typeof key === "number";
};
function isValidMapKeyType(key: unknown, useMap: boolean): key is MapKeyType {
if (useMap) {
return typeof key === "string" || typeof key === "number" || typeof key === "bigint" || key instanceof Uint8Array;
jannotti marked this conversation as resolved.
Show resolved Hide resolved
}
// Plain objects support a more limited set of key types
return typeof key === "string";

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a breaking change, should we keep the existing logic looking for number too?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's pretty strange to me that the upstream library was getting away with just coercing number keys to strings on objects, so I wanted to do away with that behavior now that there's a better alternative.

Although, since it is a breaking change, I think I'd be willing to add another decoding option that could control that behavior if it's really what you wanted, since this has been an established practice in the upstream library for a while.

}

type StackMapState = {
type: typeof STATE_MAP_KEY | typeof STATE_MAP_VALUE;
size: number;
key: MapKeyType | null;
readCount: number;
map: Record<string, unknown>;
map: Record<string, unknown> | Map<MapKeyType, unknown>;
};

type StackArrayState = {
Expand All @@ -107,6 +121,8 @@ class StackPool {
private readonly stack: Array<StackState> = [];
private stackHeadPosition = -1;

constructor(private readonly useMap: boolean) {}

public get length(): number {
return this.stackHeadPosition + 1;
}
Expand All @@ -130,7 +146,7 @@ class StackPool {
state.type = STATE_MAP_KEY;
state.readCount = 0;
state.size = size;
state.map = {};
state.map = this.useMap ? new Map() : {};
}

private getUninitializedStateFromPool() {
Expand Down Expand Up @@ -214,6 +230,7 @@ export class Decoder<ContextType = undefined> {
private readonly context: ContextType;
private readonly intMode: IntMode;
private readonly useRawBinaryStrings: boolean;
private readonly useMap: boolean;
private readonly maxStrLength: number;
private readonly maxBinLength: number;
private readonly maxArrayLength: number;
Expand All @@ -227,20 +244,23 @@ export class Decoder<ContextType = undefined> {
private view = EMPTY_VIEW;
private bytes = EMPTY_BYTES;
private headByte = HEAD_BYTE_REQUIRED;
private readonly stack = new StackPool();
private readonly stack: StackPool;

public constructor(options?: DecoderOptions<ContextType>) {
this.extensionCodec = options?.extensionCodec ?? (ExtensionCodec.defaultCodec as ExtensionCodecType<ContextType>);
this.context = (options as { context: ContextType } | undefined)?.context as ContextType; // needs a type assertion because EncoderOptions has no context property when ContextType is undefined

this.intMode = options?.intMode ?? (options?.useBigInt64 ? IntMode.AS_ENCODED : IntMode.UNSAFE_NUMBER);
this.useRawBinaryStrings = options?.useRawBinaryStrings ?? false;
this.useMap = options?.useMap ?? false;
this.maxStrLength = options?.maxStrLength ?? UINT32_MAX;
this.maxBinLength = options?.maxBinLength ?? UINT32_MAX;
this.maxArrayLength = options?.maxArrayLength ?? UINT32_MAX;
this.maxMapLength = options?.maxMapLength ?? UINT32_MAX;
this.maxExtLength = options?.maxExtLength ?? UINT32_MAX;
this.keyDecoder = options?.keyDecoder !== undefined ? options.keyDecoder : sharedCachedKeyDecoder;

this.stack = new StackPool(this.useMap);
}

private reinitializeState() {
Expand Down Expand Up @@ -404,7 +424,7 @@ export class Decoder<ContextType = undefined> {
this.complete();
continue DECODE;
} else {
object = {};
object = this.useMap ? new Map() : {};
}
} else if (headByte < 0xa0) {
// fixarray (1001 xxxx) 0x90 - 0x9f
Expand Down Expand Up @@ -571,10 +591,11 @@ export class Decoder<ContextType = undefined> {
continue DECODE;
}
} else if (state.type === STATE_MAP_KEY) {
if (!isValidMapKeyType(object)) {
throw new DecodeError("The type of key must be string or number but " + typeof object);
if (!isValidMapKeyType(object, this.useMap)) {
const acceptableTypes = this.useMap ? "string, number, bigint, or Uint8Array" : "string";
throw new DecodeError(`The type of key must be ${acceptableTypes} but got ${typeof object}`);
}
if (object === "__proto__") {
if (!this.useMap && object === "__proto__") {
throw new DecodeError("The key __proto__ is not allowed");
}

Expand All @@ -584,7 +605,11 @@ export class Decoder<ContextType = undefined> {
} else {
// it must be `state.type === State.MAP_VALUE` here

state.map[state.key!] = object;
if (this.useMap) {
(state.map as Map<MapKeyType, unknown>).set(state.key!, object);
} else {
(state.map as Record<string, unknown>)[state.key as string] = object;
}
state.readCount++;

if (state.readCount === state.size) {
Expand Down Expand Up @@ -650,7 +675,7 @@ export class Decoder<ContextType = undefined> {
}

private decodeString(byteLength: number, headerOffset: number): string | Uint8Array {
if (!this.useRawBinaryStrings || this.stateIsMapKey()) {
if (!this.useRawBinaryStrings || (!this.useMap && this.stateIsMapKey())) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I read this right, does it mean if you are using maps and you set useRawBinaryStrings then it will use a uint8array for the map keys? I wonder if we should allow that to be switchable?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct, and your suggestion makes sense

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added two options for this in 7342b02

return this.decodeUtf8String(byteLength, headerOffset);
}
return this.decodeBinary(byteLength, headerOffset);
Expand Down
58 changes: 48 additions & 10 deletions src/Encoder.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { utf8Count, utf8Encode } from "./utils/utf8";
import { ExtensionCodec, ExtensionCodecType } from "./ExtensionCodec";
import { setInt64, setUint64 } from "./utils/int";
import { ensureUint8Array } from "./utils/typedArrays";
import { ensureUint8Array, compareUint8Arrays } from "./utils/typedArrays";
import type { ExtData } from "./ExtData";
import type { ContextOf } from "./context";

Expand Down Expand Up @@ -321,8 +321,10 @@ export class Encoder<ContextType = undefined> {
// this is here instead of in doEncode so that we can try encoding with an extension first,
// otherwise we would break existing extensions for bigints
this.encodeBigInt(object);
} else if (object instanceof Map) {
this.encodeMap(object, depth);
} else if (typeof object === "object") {
this.encodeMap(object as Record<string, unknown>, depth);
this.encodeMapObject(object as Record<string, unknown>, depth);
} else {
// symbol, function and other special object come here unless extensionCodec handles them.
throw new Error(`Unrecognized object: ${Object.prototype.toString.apply(object)}`);
Expand Down Expand Up @@ -371,25 +373,51 @@ export class Encoder<ContextType = undefined> {
}
}

private countWithoutUndefined(object: Record<string, unknown>, keys: ReadonlyArray<string>): number {
private countWithoutUndefined(map: Map<unknown, unknown>, keys: ReadonlyArray<unknown>): number {
let count = 0;

for (const key of keys) {
if (object[key] !== undefined) {
if (map.get(key) !== undefined) {
count++;
}
}

return count;
}

private encodeMap(object: Record<string, unknown>, depth: number) {
const keys = Object.keys(object);
private sortMapKeys(keys: Array<unknown>): Array<unknown> {
const numericKeys: Array<number | bigint> = [];
const stringKeys: Array<string> = [];
const binaryKeys: Array<Uint8Array> = [];
for (const key of keys) {
if (typeof key === "number" || typeof key === "bigint") {
numericKeys.push(key);
} else if (typeof key === "string") {
stringKeys.push(key);
} else if (ArrayBuffer.isView(key)) {
binaryKeys.push(ensureUint8Array(key));
} else {
throw new Error(`Unrecognized map key type: ${Object.prototype.toString.apply(key)}`);
}
}
numericKeys.sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); // Avoid using === to compare numbers and bigints
stringKeys.sort();
binaryKeys.sort(compareUint8Arrays);
// At the moment this arbitrarily orders the keys as numeric, string, binary
return (numericKeys as Array<unknown>).concat(stringKeys).concat(binaryKeys);
}

private encodeMapObject(object: Record<string, unknown>, depth: number) {
this.encodeMap(new Map<string, unknown>(Object.entries(object)), depth);
}

private encodeMap(map: Map<unknown, unknown>, depth: number) {
let keys = Array.from(map.keys());
if (this.sortKeys) {
keys.sort();
keys = this.sortMapKeys(keys);
}

const size = this.ignoreUndefined ? this.countWithoutUndefined(object, keys) : keys.length;
const size = this.ignoreUndefined ? this.countWithoutUndefined(map, keys) : keys.length;

if (size < 16) {
// fixmap
Expand All @@ -407,10 +435,20 @@ export class Encoder<ContextType = undefined> {
}

for (const key of keys) {
const value = object[key];
const value = map.get(key);

if (!(this.ignoreUndefined && value === undefined)) {
this.encodeString(key);
if (typeof key === "string") {
this.encodeString(key);
} else if (typeof key === "number") {
this.encodeNumber(key);
} else if (typeof key === "bigint") {
this.encodeBigInt(key);
} else if (ArrayBuffer.isView(key)) {
this.encodeBinary(key);
} else {
throw new Error(`Unrecognized map key type: ${Object.prototype.toString.apply(key)}`);
}
this.doEncode(value, depth + 1);
}
}
Expand Down
11 changes: 11 additions & 0 deletions src/utils/typedArrays.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,14 @@ export function createDataView(buffer: ArrayLike<number> | ArrayBufferView | Arr
const bufferView = ensureUint8Array(buffer);
return new DataView(bufferView.buffer, bufferView.byteOffset, bufferView.byteLength);
}

export function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number {
const length = Math.min(a.length, b.length);
for (let i = 0; i < length; i++) {
const diff = a[i]! - b[i]!;
if (diff !== 0) {
return diff;
}
}
return a.length - b.length;
}
30 changes: 15 additions & 15 deletions test/codec-bigint.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -253,24 +253,24 @@ describe("codec BigInt", () => {
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
});

it("encodes and decodes 100n", () => {
const value = BigInt(100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
it("encodes and decodes 100n", () => {
const value = BigInt(100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});

it("encodes and decodes -100n", () => {
const value = BigInt(-100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
it("encodes and decodes -100n", () => {
const value = BigInt(-100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});

it("encodes and decodes MAX_SAFE_INTEGER+1", () => {
const value = BigInt(Number.MAX_SAFE_INTEGER) + BigInt(1);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
it("encodes and decodes MAX_SAFE_INTEGER+1", () => {
const value = BigInt(Number.MAX_SAFE_INTEGER) + BigInt(1);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
});

context("native", () => {
Expand Down
Loading
Loading