Skip to content

Commit

Permalink
Bumping embedded bitjs for native unzipping. CBZs unzip very fast now!
Browse files Browse the repository at this point in the history
  • Loading branch information
codedread committed Jan 27, 2024
1 parent 8d2c86f commit 4161f7a
Show file tree
Hide file tree
Showing 25 changed files with 3,186 additions and 290 deletions.
69 changes: 6 additions & 63 deletions code/bitjs/archive/archive.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
* Copyright(c) 2011 Google Inc.
*/

// TODO(2.0): When up-revving to a major new version, remove this module.

import { UnarchiveAppendEvent, UnarchiveErrorEvent, UnarchiveEvent, UnarchiveEventType,
UnarchiveExtractEvent, UnarchiveFinishEvent, UnarchiveInfoEvent,
UnarchiveProgressEvent, UnarchiveStartEvent, Unarchiver,
UnrarrerInternal, UntarrerInternal, UnzipperInternal,
getUnarchiverInternal } from './decompress-internal.js';
UnarchiveProgressEvent, UnarchiveStartEvent } from './events.js';
import { Unarchiver, Unzipper, Unrarrer, Untarrer, getUnarchiver } from './decompress.js';

export {
UnarchiveAppendEvent,
Expand All @@ -26,65 +27,7 @@ export {
UnarchiveProgressEvent,
UnarchiveStartEvent,
Unarchiver,
Unzipper, Unrarrer, Untarrer, getUnarchiver
}

/**
* All extracted files returned by an Unarchiver will implement
* the following interface:
*/

/**
* @typedef UnarchivedFile
* @property {string} filename
* @property {Uint8Array} fileData
*/

/**
* The goal is to make this testable - send getUnarchiver() an array buffer of
* an archive, call start on the unarchiver, expect the returned result.
*
* Problem: It relies on Web Workers, and that won't work in a nodejs context.
* Solution: Make archive.js very thin, have it feed web-specific things into
* an internal module that is isomorphic JavaScript.
*
* TODO:
* - write unit tests for archive-internal.js that use the nodejs Worker
* equivalent.
* - maybe use @pgriess/node-webworker or @audreyt/node-webworker-threads or
* just node's worker_threads ?
*/

const createWorkerFn = (scriptFilename) => new Worker(scriptFilename, { type: 'module' });

function warn() {
console.warn(`Stop using archive.js and use decompress.js instead. This module will be removed.`);
}

// Thin wrappers of unarchivers for clients who want to construct a specific
// unarchiver themselves rather than use getUnarchiver().
export class Unzipper extends UnzipperInternal {
constructor(ab, options) { warn(); super(ab, createWorkerFn, options); }
}

export class Unrarrer extends UnrarrerInternal {
constructor(ab, options) { warn(); super(ab, createWorkerFn, options); }
}

export class Untarrer extends UntarrerInternal {
constructor(ab, options) { warn(); super(ab, createWorkerFn, options); }
}

/**
* Factory method that creates an unarchiver based on the byte signature found
* in the arrayBuffer.
* @param {ArrayBuffer} ab The ArrayBuffer to unarchive. Note that this ArrayBuffer
* must not be referenced after calling this method, as the ArrayBuffer is marked
* as Transferable and sent to a Worker thread once start() is called.
* @param {Object|string} options An optional object of options, or a string
* representing where the path to the unarchiver script files.
* @returns {Unarchiver}
*/
export function getUnarchiver(ab, options = {}) {
warn();
return getUnarchiverInternal(ab, createWorkerFn, options);
}
console.error(`bitjs: Stop importing archive.js, this module will be removed. Import decompress.js instead.`);
78 changes: 78 additions & 0 deletions code/bitjs/archive/common.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/**
* common.js
*
* Provides common definitions or functionality needed by multiple modules.
*
* Licensed under the MIT License
*
* Copyright(c) 2023 Google Inc.
*/

/**
* @typedef FileInfo An object that is sent to the implementation representing a file to compress.
* @property {string} fileName The name of the file. TODO: Includes the path?
* @property {number} lastModTime The number of ms since the Unix epoch (1970-01-01 at midnight).
* @property {Uint8Array} fileData The bytes of the file.
*/

/**
* @typedef Implementation
* @property {MessagePort} hostPort The port the host uses to communicate with the implementation.
* @property {Function} disconnectFn A function to call when the port has been disconnected.
*/

/**
* Connects a host to a compress/decompress implementation via MessagePorts. The implementation must
* have an exported connect() function that accepts a MessagePort. If the runtime support Workers
* (e.g. web browsers, deno), imports the implementation inside a Web Worker. Otherwise, it
* dynamically imports the implementation inside the current JS context (node, bun).
* @param {string} implFilename The compressor/decompressor implementation filename relative to this
* path (e.g. './unzip.js').
* @param {Function} disconnectFn A function to run when the port is disconnected.
* @returns {Promise<Implementation>} The Promise resolves to the Implementation, which includes the
* MessagePort connected to the implementation that the host should use.
*/
export async function getConnectedPort(implFilename) {
const messageChannel = new MessageChannel();
const hostPort = messageChannel.port1;
const implPort = messageChannel.port2;

if (typeof Worker === 'undefined') {
const implModule = await import(`${implFilename}`);
await implModule.connect(implPort);
return {
hostPort,
disconnectFn: () => implModule.disconnect(),
};
}

return new Promise((resolve, reject) => {
const workerScriptPath = new URL(`./webworker-wrapper.js`, import.meta.url).href;
const worker = new Worker(workerScriptPath, { type: 'module' });
worker.postMessage({ implSrc: implFilename }, [implPort]);
resolve({
hostPort,
disconnectFn: () => worker.postMessage({ disconnect: true }),
});
});
}

// Zip-specific things.

export const LOCAL_FILE_HEADER_SIG = 0x04034b50;
export const CENTRAL_FILE_HEADER_SIG = 0x02014b50;
export const END_OF_CENTRAL_DIR_SIG = 0x06054b50;
export const CRC32_MAGIC_NUMBER = 0xedb88320;
export const ARCHIVE_EXTRA_DATA_SIG = 0x08064b50;
export const DIGITAL_SIGNATURE_SIG = 0x05054b50;
export const END_OF_CENTRAL_DIR_LOCATOR_SIG = 0x07064b50;
export const DATA_DESCRIPTOR_SIG = 0x08074b50;

/**
* @readonly
* @enum {number}
*/
export const ZipCompressionMethod = {
STORE: 0, // Default.
DEFLATE: 8, // As per http://tools.ietf.org/html/rfc1951.
};
122 changes: 71 additions & 51 deletions code/bitjs/archive/compress.js
Original file line number Diff line number Diff line change
@@ -1,42 +1,36 @@
/**
* compress.js
*
* Provides base functionality for compressing.
*
* Licensed under the MIT License
*
* Copyright(c) 2023 Google Inc.
*/

// NOTE: THIS IS A VERY HACKY WORK-IN-PROGRESS! THE API IS NOT FROZEN! USE AT YOUR OWN RISK!
import { ZipCompressionMethod, getConnectedPort } from './common.js';

// TODO(2.0): Remove this comment.
// NOTE: THIS IS A WORK-IN-PROGRESS! THE API IS NOT FROZEN! USE AT YOUR OWN RISK!

/**
* @typedef FileInfo An object that is sent to the worker to represent a file to zip.
* @typedef FileInfo An object that is sent to the implementation to represent a file to zip.
* @property {string} fileName The name of the file. TODO: Includes the path?
* @property {number} lastModTime The number of ms since the Unix epoch (1970-01-01 at midnight).
* @property {ArrayBuffer} fileData The bytes of the file.
*/

/**
* @readonly
* @enum {number}
* @property {Uint8Array} fileData The bytes of the file.
*/
export const ZipCompressionMethod = {
STORE: 0, // Default.
// DEFLATE: 8,
};

// export const DeflateCompressionMethod = {
// NO_COMPRESSION: 0,
// COMPRESSION_FIXED_HUFFMAN: 1,
// COMPRESSION_DYNAMIC_HUFFMAN: 2,
// }

/**
* Data elements are packed into bytes in order of increasing bit number within the byte,
i.e., starting with the least-significant bit of the byte.
* i.e., starting with the least-significant bit of the byte.
* Data elements other than Huffman codes are packed starting with the least-significant bit of the
data element.
* data element.
* Huffman codes are packed starting with the most-significant bit of the code.
*/

/**
* @typedef CompressorOptions
* @property {string} pathToBitJS A string indicating where the BitJS files are located.
* @property {ZipCompressionMethod} zipCompressionMethod
* @property {DeflateCompressionMethod=} deflateCompressionMethod Only present if
* zipCompressionMethod is set to DEFLATE.
*/

/**
Expand All @@ -51,35 +45,54 @@ export const CompressStatus = {
ERROR: 'error',
};

// TODO: Extend EventTarget and introduce subscribe methods (onProgress, onInsert, onFinish, etc).
// TODO: I think appendFiles() is still a good idea so that all files do not have to live in memory
// at once, but the API is wonky here... re-think it. Maybe something more like a builder?

/**
* A thing that zips files.
* NOTE: THIS IS A VERY HACKY WORK-IN-PROGRESS! THE API IS NOT FROZEN! USE AT YOUR OWN RISK!
* TODO: Make a streaming / event-driven API.
* NOTE: THIS IS A WORK-IN-PROGRESS! THE API IS NOT FROZEN! USE AT YOUR OWN RISK!
* TODO(2.0): Add semantic onXXX methods for an event-driven API.
*/
export class Zipper {
/**
* The client-side port that sends messages to, and receives messages from the
* decompressor implementation.
* @type {MessagePort}
* @private
*/
port_;

/**
* A function to call to disconnect the implementation from the host.
* @type {Function}
* @private
*/
disconnectFn_;

/**
* @param {CompressorOptions} options
*/
constructor(options) {
/**
* The path to the BitJS files.
* @type {string}
* @private
*/
this.pathToBitJS = options.pathToBitJS || '/';

/**
* @type {ZipCompressionMethod}
* @type {CompressorOptions}
* @private
*/
this.zipOptions = options;
this.zipCompressionMethod = options.zipCompressionMethod || ZipCompressionMethod.STORE;
if (!Object.values(ZipCompressionMethod).includes(this.zipCompressionMethod)) {
throw `Compression method ${this.zipCompressionMethod} not supported`;
}

/**
* Private web worker initialized during start().
* @type {Worker}
* @private
*/
this.worker_ = null;
if (this.zipCompressionMethod === ZipCompressionMethod.DEFLATE) {
// As per https://developer.mozilla.org/en-US/docs/Web/API/CompressionStream, NodeJS only
// supports deflate-raw from 21.2.0+ (Nov 2023). https://nodejs.org/en/blog/release/v21.2.0.
try {
new CompressionStream('deflate-raw');
} catch (err) {
throw `CompressionStream with deflate-raw not supported by JS runtime: ${err}`;
}
}

/**
* @type {CompressStatus}
Expand All @@ -100,14 +113,14 @@ export class Zipper {
* @param {boolean} isLastFile
*/
appendFiles(files, isLastFile) {
if (!this.worker_) {
throw `Worker not initialized. Did you forget to call start() ?`;
if (!this.port_) {
throw `Port not initialized. Did you forget to call start() ?`;
}
if (![CompressStatus.READY, CompressStatus.WORKING].includes(this.compressState)) {
throw `Zipper not in the right state: ${this.compressState}`;
}

this.worker_.postMessage({ files, isLastFile });
this.port_.postMessage({ files, isLastFile });
}

/**
Expand All @@ -119,16 +132,19 @@ export class Zipper {
* @returns {Promise<Uint8Array>} A Promise that will contain the entire zipped archive as an array
* of bytes.
*/
start(files, isLastFile) {
async start(files, isLastFile) {
const impl = await getConnectedPort('./zip.js');
this.port_ = impl.hostPort;
this.disconnectFn_ = impl.disconnectFn;
return new Promise((resolve, reject) => {
this.worker_ = new Worker(this.pathToBitJS + `archive/zip.js`);
this.worker_.onerror = (evt) => {
console.log('Worker error: message = ' + evt.message);
throw evt.message;
this.port_.onerror = (evt) => {
console.log('Impl error: message = ' + evt.message);
reject(evt.message);
};
this.worker_.onmessage = (evt) => {

this.port_.onmessage = (evt) => {
if (typeof evt.data == 'string') {
// Just log any strings the worker pumps our way.
// Just log any strings the implementation pumps our way.
console.log(evt.data);
} else {
switch (evt.data.type) {
Expand All @@ -137,6 +153,10 @@ export class Zipper {
break;
case 'finish':
this.compressState = CompressStatus.COMPLETE;
this.port_.close();
this.disconnectFn_();
this.port_ = null;
this.disconnectFn_ = null;
resolve(this.byteArray);
break;
case 'compress':
Expand All @@ -147,7 +167,7 @@ export class Zipper {
};

this.compressState = CompressStatus.READY;
this.appendFiles(files, isLastFile);
this.port_.postMessage({ files, isLastFile, compressionMethod: this.zipCompressionMethod});
});
}

Expand All @@ -162,4 +182,4 @@ export class Zipper {
this.byteArray.set(oldArray);
this.byteArray.set(newBytes, oldArray.byteLength);
}
}
}
Loading

0 comments on commit 4161f7a

Please sign in to comment.