Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve performance for specific grammars with deeply nested closures and build CJS version #25

Merged
merged 6 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
"version": "2.0.5",
"type": "module",
"description": "Alternative JavaScript/TypeScript runtime for ANTLR4",
"main": "dist/antlr4.mjs",
"main": "dist/antlr4.cjs",
"module": "dist/antlr4.mjs",
"types": "dist/index.d.ts",
"repository": "https://github.com/mike-lischke/antlr4ng",
"keywords": [
Expand Down Expand Up @@ -40,18 +41,24 @@
"scripts": {
"prepublishOnly": "npm run build-minified && npm run test",
"tsc": "tsc --watch",
"build": "npm run generate-test-parser && tsc && esbuild ./src/index.js --main-fields=module,main --bundle --outfile=dist/antlr4.mjs --format=esm --sourcemap",
"build-minified": "tsc && npm run generate-test-parser && esbuild ./src/index.js --bundle --outfile=dist/antlr4.mjs --format=esm --sourcemap --minify",
"build": "npm run generate-test-parser && tsc && npm run build-cjs && npm run build-mjs",
"build-minified": "npm run generate-test-parser && tsc && npm run build-cjs-minified && npm run build-mjs-minified",
"build-bundle": "esbuild ./src/index.js --main-fields=module,main --bundle --sourcemap",
"build-mjs": "npm run build-bundle -- --outfile=dist/antlr4.mjs --format=esm",
"build-mjs-minified": "npm run build-mjs -- --minify",
"build-cjs": "npm run build-bundle -- --outfile=dist/antlr4.cjs --format=cjs",
"build-cjs-minified": "npm run build-cjs -- --minify",
"full-test": "npm run test && npm run run-benchmarks",
"test": "node --no-warnings --experimental-vm-modules node_modules/jest/bin/jest.js --no-coverage",
"generate-test-parser": "cli/index.js -Dlanguage=TypeScript -o tests/benchmarks/generated -visitor -listener -Xexact-output-dir tests/benchmarks/MySQLLexer.g4 tests/benchmarks/MySQLParser.g4",
"generate-xpath-lexer": "cli/index.js -Dlanguage=TypeScript -o src/tree/xpath/generated -no-visitor -no-listener -Xexact-output-dir src/tree/xpath/XPathLexer.g4",
"generate-test-parser": "node cli/index.js -Dlanguage=TypeScript -o tests/benchmarks/generated -visitor -listener -Xexact-output-dir tests/benchmarks/MySQLLexer.g4 tests/benchmarks/MySQLParser.g4",
"generate-xpath-lexer": "node cli/index.js -Dlanguage=TypeScript -o src/tree/xpath/generated -no-visitor -no-listener -Xexact-output-dir src/tree/xpath/XPathLexer.g4",
"run-benchmarks": "node --no-warnings --experimental-vm-modules --loader ts-node/esm tests/benchmarks/run-benchmarks.ts",
"profile benchmarks": "node --no-warnings --experimental-vm-modules --prof --loader ts-node/esm tests/benchmarks/run-benchmarks.ts",
"process profile tick file": " node --prof-process isolate-0x130008000-75033-v8.log > processed.txt"
},
"exports": {
"types": "./dist/index.d.ts",
"default": "./dist/antlr4.mjs"
"require": "./dist/antlr4.cjs",
"import": "./dist/antlr4.mjs"
}
}
23 changes: 21 additions & 2 deletions src/CharStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,27 @@ import { Token } from "./Token.js";
import { Interval } from "./misc/Interval.js";
import { IntStream } from "./IntStream.js";

// TODO: CharStream should be an interface, not a class.
export class CharStream implements IntStream {
export interface CharStream extends IntStream {
/**
* Reset the stream so that it's in the same state it was
* when the object was created *except* the data array is not
* touched.
*/
reset(): void;
/**
* get a substring from the stream at start to stop (inclusive).
* @param start Start index
* @param stop Stop index
*/
getText(start: number, stop: number): string;
/**
* get a substring from the stream at specified interval (inclusive).
* @param interval
*/
getText(interval: Interval): string;
}

export class CharStreamImpl implements CharStream {
public name = "";
public index = 0;

Expand Down
4 changes: 2 additions & 2 deletions src/CharStreams.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

/* eslint-disable jsdoc/require-param, jsdoc/require-returns */

import { CharStream } from "./CharStream.js";
import { CharStream, CharStreamImpl } from "./CharStream.js";

/**
* Utility functions to create Character streams from various sources.
Expand All @@ -21,6 +21,6 @@ import { CharStream } from "./CharStream.js";
export class CharStreams {
// Creates an CharStream from a string.
public static fromString(str: string): CharStream {
return new CharStream(str);
return new CharStreamImpl(str);
}
}
123 changes: 73 additions & 50 deletions src/misc/HashMap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,92 +9,115 @@ import { EqualsFunction, HashFunction } from "./HashSet.js";

interface Entry<Key extends IComparable, Value> { key: Key, value: Value; }

export class HashMap<Key extends IComparable, Value> {
private data: { [key: string]: Array<Entry<Key, Value>>; };
export class HashMap<TKey extends IComparable, TValue> {

/**
* Threshold for using hashing amd searching the bucket instead of a linear search.
* Set to 0 to disable linear search and always use the hash function.
*/
public static LINEAR_SEARCH_THRESHOLD = 5;

#values: TValue[] = [];
#keys: TKey[] = [];
#data: Record<string, number[]> = {};

private hashFunction: HashFunction;
private equalsFunction: EqualsFunction;

public constructor(hashFunction?: HashFunction, equalsFunction?: EqualsFunction) {
this.data = {};
this.hashFunction = hashFunction ?? standardHashCodeFunction;
this.equalsFunction = equalsFunction ?? standardEqualsFunction;
}

public set(key: Key, value: Value): Value {
const hashKey = this.hashFunction(key);
if (hashKey in this.data) {
const entries = this.data[hashKey];
for (const entry of entries) {
if (this.equalsFunction(key, entry.key)) {
const oldValue = entry.value;
entry.value = value;

return oldValue;
}
public set(key: TKey, value: TValue): TValue {
if (this.#values.length < HashMap.LINEAR_SEARCH_THRESHOLD) {
const existingIndex = this.#values.findIndex((_, index) => this.equalsFunction(key, this.#keys[index]));
if (existingIndex >= 0) {
return this.replaceEntry(existingIndex, value);
}
entries.push({ key, value });
}

return value;
} else {
this.data[hashKey] = [{ key, value }];
const hashKey = this.hashFunction(key);
const entries = this.#data[hashKey];

if (entries && this.#values.length >= HashMap.LINEAR_SEARCH_THRESHOLD) {
const existingIndex = entries.find((entryIndex) => this.equalsFunction(key, this.#keys[entryIndex]));
if (existingIndex !== undefined) {
return this.replaceEntry(existingIndex, value);
}
entries.push(this.addEntry(key, value));
return value;
}

this.#data[hashKey] = [ this.addEntry(key, value) ];
return value;
}

public containsKey(key: Key): boolean {
const hashKey = this.hashFunction(key);
if (hashKey in this.data) {
const entries = this.data[hashKey];
for (const entry of entries) {
if (this.equalsFunction(key, entry.key)) {
return true;
}
private addEntry(key: TKey, value: TValue): number {
const index = this.#keys.push(key);
this.#values.push(value);
return index - 1;
}

private replaceEntry(index: number, value: TValue): TValue {
const oldValue = this.#values[index];
this.#values[index] = value;
return oldValue;
}

public containsKey(key: TKey): boolean {
if (this.#keys.length) {
if (this.#keys.length < HashMap.LINEAR_SEARCH_THRESHOLD) {
return this.#keys.some((k) => this.equalsFunction(key, k));
}

const hashKey = this.hashFunction(key);
const entries = this.#data[hashKey];
if (entries) {
return entries.some((entryIndex) => this.equalsFunction(key, this.#keys[entryIndex]));
}
}

return false;
}

public get(key: Key): Value | null {
public get(key: TKey): TValue | null {
if (!this.#keys.length) {
return null;
}

if (this.#values.length < HashMap.LINEAR_SEARCH_THRESHOLD) {
return this.#values.find((_, index) => this.equalsFunction(key, this.#keys[index])) ?? null;
}

const hashKey = this.hashFunction(key);
if (hashKey in this.data) {
const entries = this.data[hashKey];
for (const entry of entries) {
if (this.equalsFunction(key, entry.key)) {
return entry.value;
}
const entries = this.#data[hashKey];
if (entries) {
const index = entries.find((entryIndex) => this.equalsFunction(key, this.#keys[entryIndex]));
if (index !== undefined) {
return this.#values[index];
}
}

return null;
}

public entries(): Array<Entry<Key, Value>> {
return Object.keys(this.data).flatMap((key) => {
return this.data[key];
}, this);
public entries(): Array<Entry<TKey, TValue>> {
return this.#values.map((value, index) => ({ key: this.#keys[index], value }));
}

public getKeys(): Key[] {
return this.entries().map((e) => { return e.key; });
public getKeys(): TKey[] {
return this.#keys;
}

public getValues(): Value[] {
return this.entries().map((e) => { return e.value; });
public getValues(): TValue[] {
return this.#values;
}

public toString(): string {
const ss = this.entries().map((e) => { return "{" + e.key + ":" + e.value + "}"; });

return "[" + ss.join(", ") + "]";
return `${this.#values.map((value, index) => `${this.#keys[index]}: ${value}`).join(", ")}`;
}

public get length(): number {
return Object.keys(this.data).map((key) => {
return this.data[key].length;
}, this).reduce((accumulator, item) => {
return accumulator + item;
}, 0);
return this.#values.length;
}
}
75 changes: 45 additions & 30 deletions src/misc/HashSet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,70 +10,85 @@ export type HashFunction = (a: string | IComparable) => number;
export type EqualsFunction = (a: IComparable | null, b: unknown) => boolean;

export class HashSet<T extends IComparable> {
private data: { [key: string]: T[]; };

/**
* Threshold for using hashing amd searching the bucket instead of a linear search.
* Set to 0 to disable linear search and always use the hash function.
*/
public static LINEAR_SEARCH_THRESHOLD = 5;

#values: T[] = [];
#data: Record<string, number[]> = {};

private hashFunction: HashFunction;
private equalsFunction: EqualsFunction;

public constructor(hashFunction?: HashFunction, equalsFunction?: EqualsFunction) {
this.data = {};
this.hashFunction = hashFunction ?? standardHashCodeFunction;
this.equalsFunction = equalsFunction ?? standardEqualsFunction;
}

public add(value: T): T {
const key = this.hashFunction(value);
if (key in this.data) {
const entries = this.data[key];
for (const entry of entries) {
if (this.equalsFunction(value, entry)) {
return entry;
}
if (this.#values.length && this.#values.length < HashSet.LINEAR_SEARCH_THRESHOLD) {
const existing = this.#values.find((v) => this.equalsFunction(v, value));
if (existing !== undefined) {
return existing;
}
}

entries.push(value);
const key = this.hashFunction(value);
const entries = this.#data[key];

return value;
} else {
this.data[key] = [value];
if (entries && this.#values.length >= HashSet.LINEAR_SEARCH_THRESHOLD) {
const existingIndex = entries.find((entryIndex) => this.equalsFunction(value, this.#values[entryIndex]));
if (existingIndex !== undefined) {
return this.#values[existingIndex];
}

const index = this.#values.push(value) - 1;
entries.push(index);
return value;
}

const index = this.#values.push(value) - 1;
this.#data[key] = [index];
this.#values.push(value);
return value;
}

public has(value: T): boolean {
return this.get(value) != null;
}

public get(value: T): T | null {
const key = this.hashFunction(value);
if (key in this.data) {
const entries = this.data[key];
if (!this.#values.length) {
return null;
}

for (const entry of entries) {
if (this.equalsFunction(value, entry)) {
return entry;
}
}
if (this.#values.length < HashSet.LINEAR_SEARCH_THRESHOLD) {
return this.#values.find((v) => this.equalsFunction(v, value)) ?? null;
}

const key = this.hashFunction(value);
const entries = this.#data[key];
if (entries) {
const index = entries.find((entryIndex) => this.equalsFunction(value, this.#values[entryIndex]));
if (index !== undefined) {
return this.#values[index];
}
}
return null;
}

public values(): T[] {
return Object.keys(this.data).flatMap((key) => {
return this.data[key];
}, this);
return this.#values;
}

public toString(): string {
return arrayToString(this.values());
return arrayToString(this.#values);
}

public get length(): number {
return Object.keys(this.data).map((key) => {
return this.data[key].length;
}, this).reduce((accumulator, item) => {
return accumulator + item;
}, 0);
return this.#values.length;
}
}
2 changes: 1 addition & 1 deletion tests/CharStream.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* can be found in the LICENSE.txt file in the project root.
*/

import { CharStream, IntStream, Interval } from "../src/index.js";
import { CharStreamImpl as CharStream, IntStream, Interval } from "../src/index.js";

const unicodeInput = "Hello 👋, World! 😁";

Expand Down
4 changes: 2 additions & 2 deletions tests/rewriter/TokenStreamRewriter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import { Calc } from "./generatedCode/calc.js";
* @returns A new TokenStreamRewriter instance.
*/
const getRewriter = (lexerClass: typeof antlr4.Lexer, input: string) => {
const chars = new antlr4.CharStream(input);
const chars = antlr4.CharStreams.fromString(input);

// @ts-ignore
const lexer: antlr4.Lexer = new lexerClass(chars);
Expand Down Expand Up @@ -403,7 +403,7 @@ describe("TokenStreamRewriter", () => {

it("throws an error if second replace operation overlaps the first one on the left", () => {
// Arrange
const chars = new antlr4.CharStream("abcccba");
const chars = antlr4.CharStreams.fromString("abcccba");
const lexer = new ABC(chars);
const tokens = new antlr4.CommonTokenStream(lexer);
tokens.fill();
Expand Down