From 06056705e5be71ba1d31cf49feed270ecf4fcfa0 Mon Sep 17 00:00:00 2001 From: "Rob Moore (MakerX)" Date: Sat, 27 Jan 2024 22:44:05 +0800 Subject: [PATCH] feat: Added `useRawBinaryStrings` option to Decoder to allow override of default UTF-8 behaviour --- .vscode/settings.json | 14 +++++----- README.md | 28 +++++++++++-------- src/Decoder.ts | 27 +++++++++++++++--- test/decode-raw-strings.test.ts | 49 +++++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 23 deletions(-) create mode 100644 test/decode-raw-strings.test.ts diff --git a/.vscode/settings.json b/.vscode/settings.json index 30b6d8a..60720d1 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,13 +2,13 @@ "typescript.tsdk": "node_modules/typescript/lib", "files.eol": "\n", "editor.tabSize": 2, + "editor.defaultFormatter": "esbenp.prettier-vscode", + "editor.formatOnSave": false, "editor.codeActionsOnSave": { - "source.fixAll.eslint": true + "source.fixAll.eslint": true, + "source.fixAll": "always" }, - "cSpell.words": [ - "instanceof", - "tsdoc", - "typeof", - "whatwg" - ] + "cSpell.words": ["instanceof", "tsdoc", "typeof", "whatwg"], + "mochaExplorer.files": "test/**/*.test.{ts,js}", + "mochaExplorer.require": ["ts-node/register", "tsconfig-paths/register"] } diff --git a/README.md b/README.md index 5cebb1f..0740b6a 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,7 @@ Name|Type|Default extensionCodec | ExtensionCodec | `ExtensionCodec.defaultCodec` context | user-defined | - useBigInt64 | boolean | false +useRawBinaryStrings | boolean | false maxDepth | number | `100` initialBufferSize | number | `2048` sortKeys | boolean | false @@ -121,6 +122,8 @@ forceFloat32 | boolean | false forceIntegerToFloat | boolean | false ignoreUndefined | boolean | false +To skip UTF-8 decoding of strings, `useRawBinaryStrings` can be set to `true`. In this case, strings are decoded into `Uint8Array`. + ### `decode(buffer: ArrayLike | BufferSource, options?: DecoderOptions): unknown` It decodes `buffer` that includes a MessagePack-encoded object, and returns the decoded object typed `unknown`. @@ -498,18 +501,19 @@ null, undefined|nil|null (*1) boolean (true, false)|bool family|boolean (true, false) number (53-bit int)|int family|number number (64-bit float)|float family|number -string|str family|string -ArrayBufferView |bin family|Uint8Array (*2) +string|str family|string (*2) +ArrayBufferView |bin family|Uint8Array (*3) Array|array family|Array -Object|map family|Object (*3) -Date|timestamp ext family|Date (*4) -bigint|N/A|N/A (*5) +Object|map family|Object (*4) +Date|timestamp ext family|Date (*5) +bigint|N/A|N/A (*6) -* *1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null` -* *2 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array` -* *3 In handling `Object`, it is regarded as `Record` in terms of TypeScript -* *4 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec. -* *5 bigint is not supported in `useBigInt64: false` mode, but you can define an extension codec for it. +* *1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null`. +* *2 If you'd like to skip UTF-8 decoding of strings, set `useRawBinaryStrings: true`. In this case, strings are decoded into `Uint8Array`. +* *3 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array`. +* *4 In handling `Object`, it is regarded as `Record` in terms of TypeScript. +* *5 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec. +* *6 bigint is not supported in `useBigInt64: false` mode, but you can define an extension codec for it. If you set `useBigInt64: true`, the following mapping is used: @@ -519,7 +523,7 @@ null, undefined|nil|null boolean (true, false)|bool family|boolean (true, false) **number (32-bit int)**|int family|number **number (except for the above)**|float family|number -**bigint**|int64 / uint64|bigint (*6) +**bigint**|int64 / uint64|bigint (*7) string|str family|string ArrayBufferView |bin family|Uint8Array Array|array family|Array @@ -527,7 +531,7 @@ Object|map family|Object Date|timestamp ext family|Date -* *6 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined. +* *7 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined. ## Prerequisites diff --git a/src/Decoder.ts b/src/Decoder.ts index eedb0fb..699dd97 100644 --- a/src/Decoder.ts +++ b/src/Decoder.ts @@ -20,6 +20,16 @@ export type DecoderOptions = Readonly< */ useBigInt64: boolean; + /** + * By default, string values will be decoded as UTF-8 strings. However, if this option is true, + * string values will be returned as Uint8Arrays without additional decoding. + * + * This is useful if the strings may contain invalid UTF-8 sequences. + * + * Note that this option only applies to string values, not map keys. Additionally, when + * enabled, raw string length is limited by the maxBinLength option. + */ + useRawBinaryStrings: boolean; /** * Maximum string length. * @@ -195,6 +205,7 @@ export class Decoder { private readonly extensionCodec: ExtensionCodecType; private readonly context: ContextType; private readonly useBigInt64: boolean; + private readonly useRawBinaryStrings: boolean; private readonly maxStrLength: number; private readonly maxBinLength: number; private readonly maxArrayLength: number; @@ -215,6 +226,7 @@ export class Decoder { this.context = (options as { context: ContextType } | undefined)?.context as ContextType; // needs a type assertion because EncoderOptions has no context property when ContextType is undefined this.useBigInt64 = options?.useBigInt64 ?? false; + this.useRawBinaryStrings = options?.useRawBinaryStrings ?? false; this.maxStrLength = options?.maxStrLength ?? UINT32_MAX; this.maxBinLength = options?.maxBinLength ?? UINT32_MAX; this.maxArrayLength = options?.maxArrayLength ?? UINT32_MAX; @@ -399,7 +411,7 @@ export class Decoder { } else { // fixstr (101x xxxx) 0xa0 - 0xbf const byteLength = headByte - 0xa0; - object = this.decodeUtf8String(byteLength, 0); + object = this.decodeString(byteLength, 0); } } else if (headByte === 0xc0) { // nil @@ -451,15 +463,15 @@ export class Decoder { } else if (headByte === 0xd9) { // str 8 const byteLength = this.lookU8(); - object = this.decodeUtf8String(byteLength, 1); + object = this.decodeString(byteLength, 1); } else if (headByte === 0xda) { // str 16 const byteLength = this.lookU16(); - object = this.decodeUtf8String(byteLength, 2); + object = this.decodeString(byteLength, 2); } else if (headByte === 0xdb) { // str 32 const byteLength = this.lookU32(); - object = this.decodeUtf8String(byteLength, 4); + object = this.decodeString(byteLength, 4); } else if (headByte === 0xdc) { // array 16 const size = this.readU16(); @@ -637,6 +649,13 @@ export class Decoder { this.stack.pushArrayState(size); } + private decodeString(byteLength: number, headerOffset: number): string | Uint8Array { + if (!this.useRawBinaryStrings || this.stateIsMapKey()) { + return this.decodeUtf8String(byteLength, headerOffset); + } + return this.decodeBinary(byteLength, headerOffset); + } + private decodeUtf8String(byteLength: number, headerOffset: number): string { if (byteLength > this.maxStrLength) { throw new DecodeError( diff --git a/test/decode-raw-strings.test.ts b/test/decode-raw-strings.test.ts new file mode 100644 index 0000000..dd6d7f8 --- /dev/null +++ b/test/decode-raw-strings.test.ts @@ -0,0 +1,49 @@ +import assert from "assert"; +import { encode, decode } from "../src"; +import type { DecoderOptions } from "../src"; + +describe("decode with useRawBinaryStrings specified", () => { + const options = { useRawBinaryStrings: true } satisfies DecoderOptions; + + it("decodes string as binary", () => { + const actual = decode(encode("foo"), options); + const expected = Uint8Array.from([0x66, 0x6f, 0x6f]); + assert.deepStrictEqual(actual, expected); + }); + + it("decodes invalid UTF-8 string as binary", () => { + const invalidUtf8String = Uint8Array.from([ + 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176, + 184, 221, 66, 188, 171, 36, 135, 121, + ]); + const encoded = Uint8Array.from([ + 196, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, + 176, 184, 221, 66, 188, 171, 36, 135, 121, + ]); + + const actual = decode(encoded, options); + assert.deepStrictEqual(actual, invalidUtf8String); + }); + + it("decodes object keys as strings", () => { + const actual = decode(encode({ key: "foo" }), options); + const expected = { key: Uint8Array.from([0x66, 0x6f, 0x6f]) }; + assert.deepStrictEqual(actual, expected); + }); + + it("ignores maxStrLength", () => { + const lengthLimitedOptions = { ...options, maxStrLength: 1 } satisfies DecoderOptions; + + const actual = decode(encode("foo"), lengthLimitedOptions); + const expected = Uint8Array.from([0x66, 0x6f, 0x6f]); + assert.deepStrictEqual(actual, expected); + }); + + it("respects maxBinLength", () => { + const lengthLimitedOptions = { ...options, maxBinLength: 1 } satisfies DecoderOptions; + + assert.throws(() => { + decode(encode("foo"), lengthLimitedOptions); + }, /max length exceeded/i); + }); +});