diff --git a/.eslintrc.json b/.eslintrc.json index bf0fd38..216ebb4 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -13,6 +13,7 @@ ], "ignorePatterns": [ "src/**/*.js", + "src/tree/xpath/XPathLexer.ts", "spec/**/*.*js", "dist/**/*", "cli/index.js", @@ -536,7 +537,7 @@ ], "@typescript-eslint/no-explicit-any": "error", "@typescript-eslint/no-parameter-properties": "off", - "@typescript-eslint/no-use-before-define": "error", + "@typescript-eslint/no-use-before-define": "off", "@typescript-eslint/no-unsafe-assignment": "off", // TODO: enable "@typescript-eslint/no-unsafe-member-access": "off", // TODO: enable "@typescript-eslint/no-unsafe-call": "error", // TODO: enable @@ -610,6 +611,7 @@ ], "@typescript-eslint/restrict-template-expressions": "off", "@typescript-eslint/restrict-plus-operands": "off", + "@typescript-eslint/no-base-to-string": "off", "jsdoc/check-alignment": "error", "jsdoc/check-indentation": "off", "jsdoc/require-param-type": "off", @@ -620,6 +622,7 @@ { "startLines": 1 } - ] + ], + "jsdoc/no-undefined-types": "off" } } diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index e3c9829..6db26c8 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -5,7 +5,7 @@ name: Build & Test on: push: - branches: [ master ] + branches: [ master, ts-migration ] pull_request: branches: [ master ] diff --git a/.vscode/launch.json b/.vscode/launch.json index a723c75..ed729ef 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,8 +8,9 @@ "type": "node", "request": "launch", "name": "Run current Jest test", - "runtimeExecutable": null, + "runtimeExecutable": "node", "runtimeArgs": [ + "--experimental-vm-modules", "${workspaceRoot}/node_modules/.bin/jest", "${fileBasenameNoExtension}.ts", "--no-coverage", @@ -38,6 +39,7 @@ "ts-node/esm", "tests/benchmarks/run-benchmarks.ts", ], + "sourceMaps": true, } ] } diff --git a/ReadMe.md b/ReadMe.md index 5fc9ad4..c7f2b49 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -6,23 +6,18 @@ # TypeScript Runtime for ANTLR 4 -This package is a fork of the official ANTLR4 JavaScript runtime (with its TypeScript additions), with the following changes: +This package is a fork of the official ANTLR4 JavaScript runtime and has been fully transformed to TypeScript. Other improvements are: -- Much improved TypeScript type definitions. - XPath implementation. - Vocabulary implementation. - Complete Interval implementation. - Parser and lexer interpreters. -- A couple of bug fixes. -- Consistent formatting (indentation, semicolons, spaces, etc.). -- Project folder structure is now similar to the Java runtime. -- Numerous smaller fixes (`null` instead of `undefined` and others). +- Numerous bug fixes and other changes. - Smaller node package (no test specs or other unnecessary files). - No CommonJS support anymore (ESM only). No differentiation between node and browser environments. -- Build is now based on esbuild. - Includes the `antlr4ng-cli` tool to generate parser files compatible with this runtime. This tool uses a custom build of the ANTLR4 tool. -It is (mostly) a drop-in replacement of the `antlr4` package, and can be used as such. For more information about ANTLR see www.antlr.org. Read more details about the [JavaScript](https://github.com/antlr/antlr4/blob/master/doc/javascript-target.md) and [TypeScript](https://github.com/antlr/antlr4/blob/master/doc/typescript-target.md) targets at the provided links, but keep in mind that this documentation applies to the original JS/TS target. +This package is a blend of the original JS implementation and antlr4ts, which is a TypeScript implementation of the ANTLR4 runtime, but was abandoned. It tries to keep the best of both worlds, while following the Java runtime as close as possible. It's a bit slower than the JS runtime, but faster than antlr4ts. ## Installation @@ -39,9 +34,22 @@ npm install --save-dev antlr4ng-cli ``` See [its readme](./cli/ReadMe.md) for more information. +If you come from one of the other JS/TS runtimes, you may have to adjust your code a bit. The antlr4ng package more strictly exposes the Java nullability for certain members. This will require that you either use the non-null assertion operator to force the compiler to accept your code, or you have to check for nullability before accessing a member. The latter is the recommended way, as it is safer. + +Additionally, some members have been renamed to more TypeScript like names (e.g. Parser._ctx is now Parser.context). The following table shows the most important changes: + +| Old Name | New Name | +| -------- | -------- | +| Parser._ctx | Parser.context | +| Parser._errHandler | Parser.errorHandler | +| Parser._input | Parser.inputStream | +| Parser._interp | Parser.interpreter | + +The package requires ES2022 or newer, for features like static initialization blocks in classes and private fields (`#field`). It is recommended to use the latest TypeScript version. + ## Benchmarks -This runtime is constantly monitored for performance regressions. The following table shows the results of the benchmarks run on last release: +This runtime is monitored for performance regressions. The following table shows the results of the benchmarks run on last release: | Test | Cold Run | Warm Run| | ---- | -------- | ------- | @@ -50,11 +58,11 @@ This runtime is constantly monitored for performance regressions. The following | Large Inserts | 11022 ms | 10616 ms | | Total | 20599 ms | 10978 ms | -The benchmarks consist of a set of query files, which are parsed by a MySQL parser. The query collection file contains more than 900 MySQL queries of all kinds, from very simple to complex stored procedures, including some deeply nested select queries that can easily exhaust available stack space. The minimum MySQL server version used was 8.0.0. +The benchmarks consist of a set of query files, which are parsed by a MySQL parser. The query collection file contains more than 900 MySQL queries of all kinds, from very simple to complex stored procedures, including some deeply nested select queries that can easily exhaust the available stack space (in certain situations, such as parsing in a thread with default stack size). The minimum MySQL server version used was 8.0.0. -The large binary inserts file contains only a few dozen queries, but they are really large with deep recursions, stressing so the prediction engine of the parser. Additionally, one query contains binary (image) data which contains input characters from the whole UTF-8 range. +The large binary inserts file contains only a few dozen queries, but they are really large with deep recursions, so they stress the prediction engine of the parser. In addition, one query contains binary (image) data containing input characters from the entire UTF-8 range. -The example file is a copy of the largest test file in [this repository](https://github.com/antlr/grammars-v4/tree/master/sql/mysql/Positive-Technologies/examples), and is known to be very slow to parse with other parsers, but the one used here. +The example file is a copy of the largest test file in [this repository](https://github.com/antlr/grammars-v4/tree/master/sql/mysql/Positive-Technologies/examples), and is known to be very slow to parse with other MySQL grammars. The one used here, however, is fast. ## Release Notes diff --git a/cli/antlr4-4.13.2-SNAPSHOT-complete.jar b/cli/antlr4-4.13.2-SNAPSHOT-complete.jar index 3d01036..ced93db 100644 Binary files a/cli/antlr4-4.13.2-SNAPSHOT-complete.jar and b/cli/antlr4-4.13.2-SNAPSHOT-complete.jar differ diff --git a/cspell.json b/cspell.json index 5e638d5..06c89db 100644 --- a/cspell.json +++ b/cspell.json @@ -13,6 +13,7 @@ "rdbms", "runtimes", "sakila", + "unpredicated", "whitespaces" ], "ignoreWords": [ @@ -20,16 +21,26 @@ "Dlanguage", "Grosch", "Harwell", + "Hashable", + "IATN", + "Nondisjoint", + "Preds", + "Sethi", + "Ullman", "Wirth", "Xexact", "bitrix", "interp", "localctx", + "longlong", "nbits", + "opnds", "outfile", "parentctx", + "prec", "precpred", "recog", + "semctx", "sempred", "ttype" ], diff --git a/package.json b/package.json index feca28d..c83f1a9 100644 --- a/package.json +++ b/package.json @@ -37,22 +37,20 @@ "typescript": "5.2.2" }, "scripts": { - "prepublishOnly": "npm run build && npm run test", - "build": "npm run generate-test-parser && esbuild ./src/index.js --bundle --outfile=dist/antlr4.mjs --format=esm --sourcemap=external --minify", + "prepublishOnly": "npm run build-minified && npm run test", + "tsc": "tsc --watch", + "build": "npm run generate-test-parser && esbuild ./src/index.js --bundle --outfile=dist/antlr4.mjs --format=esm --sourcemap", + "build-minified": "npm run generate-test-parser && esbuild ./src/index.js --bundle --outfile=dist/antlr4.mjs --format=esm --sourcemap --minify", "full-test": "npm run test && npm run run-benchmarks", "test": "node --no-warnings --experimental-vm-modules node_modules/jest/bin/jest.js --no-coverage", - "lint": "eslint src/", "generate-test-parser": "cli/index.js -Dlanguage=TypeScript -o tests/benchmarks/generated -visitor -listener -Xexact-output-dir tests/benchmarks/MySQLLexer.g4 tests/benchmarks/MySQLParser.g4", - "run-benchmarks": "node --no-warnings --experimental-vm-modules --loader ts-node/esm tests/benchmarks/run-benchmarks.ts" + "generate-xpath-lexer": "cli/index.js -Dlanguage=TypeScript -o src/tree/xpath/generated -no-visitor -no-listener -Xexact-output-dir src/tree/xpath/XPathLexer.g4", + "run-benchmarks": "node --no-warnings --experimental-vm-modules --loader ts-node/esm tests/benchmarks/run-benchmarks.ts", + "profile benchmarks": "node --no-warnings --experimental-vm-modules --prof --loader ts-node/esm tests/benchmarks/run-benchmarks.ts", + "process profile tick file": " node --prof-process isolate-0x130008000-75033-v8.log > processed.txt" }, "exports": { - "types": "./src/index.d.ts", + "types": "./src/index.ts", "default": "./dist/antlr4.mjs" - }, - "babel": { - "presets": [ - "@babel/preset-env" - ], - "targets": "defaults" } } diff --git a/src/ANTLRErrorListener.ts b/src/ANTLRErrorListener.ts new file mode 100644 index 0000000..804c052 --- /dev/null +++ b/src/ANTLRErrorListener.ts @@ -0,0 +1,180 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import { Parser } from "./Parser.js"; +import { RecognitionException } from "./RecognitionException.js"; +import { Recognizer } from "./Recognizer.js"; +import { ATNConfigSet } from "./atn/ATNConfigSet.js"; +import { DFA } from "./dfa/DFA.js"; +import { ATNSimulator } from "./atn/ATNSimulator.js"; +import { Token } from "./Token.js"; +import { BitSet } from "./misc/BitSet.js"; + +/** How to emit recognition errors. */ +export interface ANTLRErrorListener { + /** + * Upon syntax error, notify any interested parties. This is not how to + * recover from errors or compute error messages. {@link ANTLRErrorStrategy} + * specifies how to recover from syntax errors and how to compute error + * messages. This listener's job is simply to emit a computed message, + * though it has enough information to create its own message in many cases. + * + *
The {@link RecognitionException} is non-null for all syntax errors except + * when we discover mismatched token errors that we can recover from + * in-line, without returning from the surrounding rule (via the single + * token insertion and deletion mechanism).
+ * + * @param recognizer + * What parser got the error. From this + * object, you can access the context as well + * as the input stream. + * @param offendingSymbol + * The offending token in the input token + * stream, unless recognizer is a lexer (then it's null). If + * no viable alternative error, {@code e} has token at which we + * started production for the decision. + * @param line + * The line number in the input where the error occurred. + * @param charPositionInLine + * The character position within that line where the error occurred. + * @param msg + * The message to emit. + * @param e + * The exception generated by the parser that led to + * the reporting of an error. It is null in the case where + * the parser was able to recover in line without exiting the + * surrounding rule. + */ + syntaxErrorEach full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.
+ * + *When {@code ambigAlts} is not null, it contains the set of potentially + * viable alternatives identified by the prediction algorithm. When + * {@code ambigAlts} is null, use {@link ATNConfigSet#getAlts} to obtain the + * represented alternatives from the {@code configs} argument.
+ * + *When {@code exact} is {@code true}, all of the potentially + * viable alternatives are truly viable, i.e. this is reporting an exact + * ambiguity. When {@code exact} is {@code false}, at least two of + * the potentially viable alternatives are viable for the current input, but + * the prediction algorithm terminated as soon as it determined that at + * least the minimum potentially viable alternative is truly + * viable.
+ * + *When the {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} prediction + * mode is used, the parser is required to identify exact ambiguities so + * {@code exact} will always be {@code true}.
+ * + *This method is not used by lexers.
+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input input where the ambiguity was identified + * @param exact {@code true} if the ambiguity is exactly known, otherwise + * {@code false}. This is always {@code true} when + * {@link PredictionMode#LL_EXACT_AMBIG_DETECTION} is used. + * @param ambigAlts the potentially ambiguous alternatives, or {@code null} + * to indicate that the potentially ambiguous alternatives are the complete + * set of represented alternatives in {@code configs} + * @param configs the ATN configuration set where the ambiguity was + * identified + */ + reportAmbiguity(recognizer: Parser, + dfa: DFA, + startIndex: number, + stopIndex: number, + exact: boolean, + ambigAlts: BitSet | null, + configs: ATNConfigSet): void; + + /** + * This method is called when an SLL conflict occurs and the parser is about + * to use the full context information to make an LL decision. + * + *If one or more configurations in {@code configs} contains a semantic + * predicate, the predicates are evaluated before this method is called. The + * subset of alternatives which are still viable after predicates are + * evaluated is reported in {@code conflictingAlts}.
+ * + *This method is not used by lexers.
+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the SLL conflict occurred + * @param conflictingAlts The specific conflicting alternatives. If this is + * {@code null}, the conflicting alternatives are all alternatives + * represented in {@code configs}. At the moment, conflictingAlts is non-null + * (for the reference implementation, but Sam's optimized version can see this + * as null). + * @param configs the ATN configuration set where the SLL conflict was + * detected + */ + reportAttemptingFullContext(recognizer: Parser, + dfa: DFA, + startIndex: number, + stopIndex: number, + conflictingAlts: BitSet | null, + configs: ATNConfigSet): void; + + /** + * This method is called by the parser when a full-context prediction has a + * unique result. + * + *Each full-context prediction which does not result in a syntax error + * will call either {@link #reportContextSensitivity} or + * {@link #reportAmbiguity}.
+ * + *For prediction implementations that only evaluate full-context + * predictions when an SLL conflict is found (including the default + * {@link ParserATNSimulator} implementation), this method reports cases + * where SLL conflicts were resolved to unique full-context predictions, + * i.e. the decision was context-sensitive. This report does not necessarily + * indicate a problem, and it may appear even in completely unambiguous + * grammars.
+ * + *{@code configs} may have more than one represented alternative if the + * full-context prediction algorithm does not evaluate predicates before + * beginning the full-context prediction. In all cases, the final prediction + * is passed as the {@code prediction} argument.
+ * + *Note that the definition of "context sensitivity" in this method + * differs from the concept in {@link DecisionInfo#contextSensitivities}. + * This method reports all instances where an SLL conflict occurred but LL + * parsing produced a unique result, whether or not that unique result + * matches the minimum alternative in the SLL conflicting set.
+ * + *This method is not used by lexers.
+ * + * @param recognizer the parser instance + * @param dfa the DFA for the current decision + * @param startIndex the input index where the decision started + * @param stopIndex the input index where the context sensitivity was + * finally determined + * @param prediction the unambiguous result of the full-context prediction + * @param configs the ATN configuration set where the unambiguous prediction + * was determined + */ + reportContextSensitivity(recognizer: Parser, + dfa: DFA, + startIndex: number, + stopIndex: number, + prediction: number, + configs: ATNConfigSet): void; +} diff --git a/src/ANTLRErrorStrategy.d.ts b/src/ANTLRErrorStrategy.ts similarity index 98% rename from src/ANTLRErrorStrategy.d.ts rename to src/ANTLRErrorStrategy.ts index ff579b4..168f0b7 100644 --- a/src/ANTLRErrorStrategy.d.ts +++ b/src/ANTLRErrorStrategy.ts @@ -25,7 +25,7 @@ import { Token } from "./Token.js"; * *TODO: what to do about lexers
*/ -export declare interface ANTLRErrorStrategy { +export interface ANTLRErrorStrategy { /** * Reset the error handler state for the specified {@code recognizer}. * diff --git a/src/BailErrorStrategy.d.ts b/src/BailErrorStrategy.d.ts deleted file mode 100644 index 1894ff8..0000000 --- a/src/BailErrorStrategy.d.ts +++ /dev/null @@ -1,12 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { DefaultErrorStrategy } from "./DefaultErrorStrategy.js"; - -export declare class BailErrorStrategy extends DefaultErrorStrategy { - public constructor(); - -} diff --git a/src/BailErrorStrategy.js b/src/BailErrorStrategy.ts similarity index 67% rename from src/BailErrorStrategy.js rename to src/BailErrorStrategy.ts index 99fddba..d9e9004 100644 --- a/src/BailErrorStrategy.js +++ b/src/BailErrorStrategy.ts @@ -4,9 +4,14 @@ * can be found in the LICENSE.txt file in the project root. */ +/* eslint-disable jsdoc/require-param */ + import { InputMismatchException } from "./InputMismatchException.js"; -import { ParseCancellationException } from "./ParseCancellationException.js"; +import { ParseCancellationException } from "./misc/ParseCancellationException.js"; import { DefaultErrorStrategy } from "./DefaultErrorStrategy.js"; +import { Parser } from "./Parser.js"; +import { RecognitionException } from "./RecognitionException.js"; +import { ParserRuleContext } from "./ParserRuleContext.js"; /** * This implementation of {@link ANTLRErrorStrategy} responds to syntax errors @@ -32,27 +37,23 @@ import { DefaultErrorStrategy } from "./DefaultErrorStrategy.js"; * * *- * {@code myparser.setErrorHandler(new BailErrorStrategy());}
+ * {@code myParser.setErrorHandler(new BailErrorStrategy());} * - * @see Parser//setErrorHandler(ANTLRErrorStrategy) - * */ + * @see Parser#setErrorHandler(ANTLRErrorStrategy) + */ export class BailErrorStrategy extends DefaultErrorStrategy { - constructor() { - super(); - } - /** * Instead of recovering from exception {@code e}, re-throw it wrapped * in a {@link ParseCancellationException} so it is not caught by the * rule function catches. Use {@link Exception//getCause()} to get the * original {@link RecognitionException}. */ - recover(recognizer, e) { - let context = recognizer._ctx; + public override recover(recognizer: Parser, e: RecognitionException): void { + let context: ParserRuleContext | null = recognizer.context; while (context !== null) { context.exception = e; - context = context.parent; + context = context.parent as ParserRuleContext; } throw new ParseCancellationException(e); } @@ -61,12 +62,19 @@ export class BailErrorStrategy extends DefaultErrorStrategy { * Make sure we don't attempt to recover inline; if the parser * successfully recovers, it won't throw an exception. */ - recoverInline(recognizer) { - this.recover(recognizer, new InputMismatchException(recognizer)); + public override recoverInline(recognizer: Parser): never { + const exception = new InputMismatchException(recognizer); + let context: ParserRuleContext | null = recognizer.context; + while (context !== null) { + context.exception = exception; + context = context.parent as ParserRuleContext; + } + + throw new ParseCancellationException(exception); } // Make sure we don't attempt to recover from problems in subrules.// - sync(recognizer) { + public override sync(_recognizer: Parser): void { // pass } } diff --git a/src/BaseErrorListener.d.ts b/src/BaseErrorListener.d.ts deleted file mode 100644 index accbc6e..0000000 --- a/src/BaseErrorListener.d.ts +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { Recognizer } from "./Recognizer.js"; -import { Token } from "./Token.js"; -import { ATNConfigSet } from "./atn/ATNConfigSet.js"; -import { ATNSimulator } from "./atn/ATNSimulator.js"; -import { DFA } from "./dfa/DFA.js"; -import { RecognitionException } from "./RecognitionException.js"; - -export declare class BaseErrorListener- * This token stream ignores the value of {@link Token#getChannel}. If your - * parser requires the token stream filter tokens to only those on a particular - * channel, such as {@link Token#DEFAULT_CHANNEL} or - * {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a - * {@link CommonTokenStream}.
- */ -export declare class BufferedTokenStream implements TokenStream { - /** - * The {@link TokenSource} from which tokens for this stream are fetched. - */ - protected tokenSource: TokenSource; - - /** - * A collection of all tokens fetched from the token source. The list is - * considered a complete view of the input once {@link #fetchedEOF} is set - * to {@code true}. - */ - protected tokens: Token[]; - - /** - * Indicates whether the {@link Token#EOF} token has been fetched from - * {@link #tokenSource} and added to {@link #tokens}. This field improves - * performance for the following cases: - * - *- * If {@code i} specifies an index at or after the EOF token, the EOF token - * index is returned. This is due to the fact that the EOF token is treated - * as though it were on every channel.
- */ - protected previousTokenOnChannel(i: number, channel: number): number; - - protected filterForChannel(left: number, right: number, channel: number): number; - -} diff --git a/src/BufferedTokenStream.js b/src/BufferedTokenStream.ts similarity index 52% rename from src/BufferedTokenStream.js rename to src/BufferedTokenStream.ts index 17891f2..a821f66 100644 --- a/src/BufferedTokenStream.js +++ b/src/BufferedTokenStream.ts @@ -4,10 +4,15 @@ * can be found in the LICENSE.txt file in the project root. */ -import { Token } from './Token.js'; -import { Lexer } from './Lexer.js'; -import { Interval } from './misc/Interval.js'; +/* eslint-disable jsdoc/require-param, jsdoc/require-returns, @typescript-eslint/naming-convention */ +/* eslint-disable jsdoc/no-undefined-types */ + +import { Token } from "./Token.js"; +import { Lexer } from "./Lexer.js"; +import { Interval } from "./misc/Interval.js"; import { TokenStream } from "./TokenStream.js"; +import { TokenSource } from "./TokenSource.js"; +import { RuleContext } from "./RuleContext.js"; /** * This implementation of {@link TokenStream} loads tokens from a @@ -21,127 +26,129 @@ import { TokenStream } from "./TokenStream.js"; * {@link Token//HIDDEN_CHANNEL}, use a filtering token stream such a * {@link CommonTokenStream}. */ -export class BufferedTokenStream extends TokenStream { - constructor(tokenSource) { +export class BufferedTokenStream implements TokenStream { + /** + * The {@link TokenSource} from which tokens for this stream are fetched. + */ + protected tokenSource: TokenSource; - super(); - // The {@link TokenSource} from which tokens for this stream are fetched. - this.tokenSource = tokenSource; - /** - * A collection of all tokens fetched from the token source. The list is - * considered a complete view of the input once {@link //fetchedEOF} is set - * to {@code true}. - */ - this.tokens = []; + /** + * A collection of all tokens fetched from the token source. The list is + * considered a complete view of the input once {@link fetchedEOF} is set + * to `true`. + */ + protected tokens: Token[] = []; - /** - * The index into {@link //tokens} of the current token (next token to - * {@link //consume}). {@link //tokens}{@code [}{@link //p}{@code ]} should - * be - * {@link //LT LT(1)}. - * - *This field is set to -1 when the stream is first constructed or when - * {@link //setTokenSource} is called, indicating that the first token has - * not yet been fetched from the token source. For additional information, - * see the documentation of {@link IntStream} for a description of - * Initializing Methods.
- */ - this._index = -1; - - /** - * Indicates whether the {@link Token//EOF} token has been fetched from - * {@link //tokenSource} and added to {@link //tokens}. This field improves - * performance for the following cases: - * - *This field is set to -1 when the stream is first constructed or when + * {@link setTokenSource} is called, indicating that the first token has + * not yet been fetched from the token source. For additional information, + * see the documentation of {@link IntStream} for a description of + * Initializing Methods.
+ */ + protected p = -1; + + /** + * Indicates whether the {@link Token.EOF} token has been fetched from + * {@link tokenSource} and added to {@link tokens}. This field improves + * performance for the following cases: + * + *- * If {@code oldToken} is also a {@link CommonToken} instance, the newly - * constructed token will share a reference to the {@link //text} field and - * the {@link Pair} stored in {@link //source}. Otherwise, {@link //text} will - * be assigned the result of calling {@link //getText}, and {@link //source} - * will be constructed from the result of {@link Token//getTokenSource} and - * {@link Token//getInputStream}.
- * - * @param oldToken The token to copy. - */ - clone() { - const t = new CommonToken(this.source, this.type, this.channel, this.start, this.stop); - t.tokenIndex = this.tokenIndex; - t.line = this.line; - t.column = this.column; - t.text = this.text; - return t; - } - - cloneWithType(type) { - const t = new CommonToken(this.source, type, this.channel, this.start, this.stop); - t.tokenIndex = this.tokenIndex; - t.line = this.line; - t.column = this.column; - if (type === Token.EOF) - t.text = ""; - return t; - } - - toString(recognizer) { - let channelStr = ""; - if (this._channel > 0) { - channelStr = ",channel=" + this.channel; - } - - let text = this.text; - if (text) { - text = text.replace(/\n/g, "\\n"); - text = text.replace(/\r/g, "\\r"); - text = text.replace(/\t/g, "\\t"); - } else { - text = "+ * These properties share a field to reduce the memory footprint of + * {@link CommonToken}. Tokens created by a {@link CommonTokenFactory} from + * the same source and input stream share a reference to the same + * {@link Pair} containing these values.
+ */ + public source: [TokenSource | null, CharStream | null]; + + public tokenIndex = -1; + + public start = 0; + + public stop = 0; + + /** + * This is the backing field for {@link #getType} and {@link #setType}. + */ + public type = 0; + + /** + * This is the backing field for {@link #getLine} and {@link #setLine}. + */ + public line = 0; + + /** + * This is the backing field for {@link #getCharPositionInLine} and + * {@link #setCharPositionInLine}. + */ + public column = -1; // set to invalid position + + /** + * This is the backing field for {@link #getChannel} and + * {@link #setChannel}. + */ + public channel = Token.DEFAULT_CHANNEL; + + /** + * This is the backing field for {@link #getText} when the token text is + * explicitly set in the constructor or via {@link #setText}. + * + * @see #getText() + */ + #text: string | null = null; + + public constructor(source: [TokenSource | null, CharStream | null], type: number, channel: number, start: number, + stop: number) { + this.source = source; + this.type = type; + this.channel = channel ?? Token.DEFAULT_CHANNEL; + this.start = start; + this.stop = stop; + if (this.source[0] !== null) { + this.line = source[0]!.line; + // eslint-disable-next-line no-underscore-dangle + this.column = source[0]!._tokenStartColumn; + } else { + this.column = -1; + } + }; + + public get tokenSource(): TokenSource | null { + return this.source[0] ?? null; + } + + public get inputStream(): CharStream | null { + return this.source[1] ?? null; + } + + /** + * Constructs a new {@link CommonToken} as a copy of another {@link Token}. + * + *+ * If {@code oldToken} is also a {@link CommonToken} instance, the newly + * constructed token will share a reference to the {@link text} field and + * the {@link Pair} stored in {@link source}. Otherwise, {@link text} will + * be assigned the result of calling {@link getText}, and {@link source} + * will be constructed from the result of {@link Token//getTokenSource} and + * {@link Token//getInputStream}.
+ */ + public clone(): CommonToken { + const t = new CommonToken(this.source, this.type, this.channel, this.start, this.stop); + t.tokenIndex = this.tokenIndex; + t.line = this.line; + t.column = this.column; + t.#text = this.#text; + + return t; + } + + public cloneWithType(type: number): CommonToken { + const t = new CommonToken(this.source, type, this.channel, this.start, this.stop); + t.tokenIndex = this.tokenIndex; + t.line = this.line; + t.column = this.column; + if (type === Token.EOF) { + t.#text = ""; + } + + return t; + } + + public toString(recognizer?: Recognizer- * The default value is {@code false} to avoid the performance and memory - * overhead of copying text for every token unless explicitly requested.
- */ - this.copyText = copyText === undefined ? false : copyText; - } - - create(source, type, text, channel, start, stop, line, column) { - const t = new CommonToken(source, type, channel, start, stop); - t.line = line; - t.column = column; - if (text !== null) { - t.text = text; - } else if (this.copyText && source[1] !== null) { - t.text = source[1].getText(start, stop); - } - return t; - } - - createThin(type, text) { - const t = new CommonToken(null, type); - t.text = text; - return t; - } -} - -/** - * The default {@link CommonTokenFactory} instance. - * - *- * This token factory does not explicitly copy token text when constructing - * tokens.
- */ -CommonTokenFactory.DEFAULT = new CommonTokenFactory(); diff --git a/src/CommonTokenFactory.ts b/src/CommonTokenFactory.ts new file mode 100644 index 0000000..c0d747b --- /dev/null +++ b/src/CommonTokenFactory.ts @@ -0,0 +1,76 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import { CharStream } from "./CharStream.js"; +import { CommonToken } from "./CommonToken.js"; +import { TokenFactory } from "./TokenFactory.js"; +import { TokenSource } from "./TokenSource.js"; + +/** + * This default implementation of {@link TokenFactory} creates + * {@link CommonToken} objects. + */ +export class CommonTokenFactory implements TokenFactory+ * This token factory does not explicitly copy token text when constructing + * tokens.
+ */ + // eslint-disable-next-line @typescript-eslint/naming-convention + public static readonly DEFAULT = new CommonTokenFactory(); + + /** + * Indicates whether {@link CommonToken#setText} should be called after + * constructing tokens to explicitly set the text. This is useful for cases + * where the input stream might not be able to provide arbitrary substrings + * of text from the input after the lexer creates a token (e.g. the + * implementation of {@link CharStream#getText} in + * {@link UnbufferedCharStream} throws an + * {@link UnsupportedOperationException}). Explicitly setting the token text + * allows {@link Token#getText} to be called at any time regardless of the + * input stream implementation. + * + *+ * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.
+ */ + protected readonly copyText: boolean = false; + + public constructor(copyText?: boolean) { + /** + * Indicates whether {@link CommonToken//setText} should be called after + * constructing tokens to explicitly set the text. This is useful for cases + * where the input stream might not be able to provide arbitrary substrings + * of text from the input after the lexer creates a token (e.g. the + * implementation of {@link CharStream//getText} in + * {@link UnbufferedCharStream} throws an + * {@link UnsupportedOperationException}). Explicitly setting the token text + * allows {@link Token//getText} to be called at any time regardless of the + * input stream implementation. + * + *+ * The default value is {@code false} to avoid the performance and memory + * overhead of copying text for every token unless explicitly requested.
+ */ + this.copyText = copyText ?? false; + } + + public create(source: [TokenSource | null, CharStream | null], type: number, text: string | null, channel: number, + start: number, stop: number, line: number, column: number): CommonToken { + const t = new CommonToken(source, type, channel, start, stop); + t.line = line; + t.column = column; + if (text !== null) { + t.text = text; + } else if (this.copyText && source[1] !== null) { + t.text = source[1].getText(start, stop); + } + + return t; + } +} diff --git a/src/CommonTokenStream.d.ts b/src/CommonTokenStream.d.ts deleted file mode 100644 index db8077f..0000000 --- a/src/CommonTokenStream.d.ts +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { Lexer } from "./Lexer.js"; -import { BufferedTokenStream } from "./BufferedTokenStream.js"; - -export declare class CommonTokenStream extends BufferedTokenStream { - public constructor(lexer: Lexer); - public constructor(lexer: Lexer, channel: number); -} diff --git a/src/CommonTokenStream.js b/src/CommonTokenStream.ts similarity index 69% rename from src/CommonTokenStream.js rename to src/CommonTokenStream.ts index d45875e..a485592 100644 --- a/src/CommonTokenStream.js +++ b/src/CommonTokenStream.ts @@ -4,8 +4,11 @@ * can be found in the LICENSE.txt file in the project root. */ -import { Token } from './Token.js'; -import { BufferedTokenStream } from './BufferedTokenStream.js'; +/* eslint-disable @typescript-eslint/naming-convention */ + +import { Token } from "./Token.js"; +import { BufferedTokenStream } from "./BufferedTokenStream.js"; +import { TokenSource } from "./TokenSource.js"; /** * This class extends {@link BufferedTokenStream} with functionality to filter @@ -14,9 +17,9 @@ import { BufferedTokenStream } from './BufferedTokenStream.js'; * ** This token stream provides access to all tokens by index or when calling - * methods like {@link //getText}. The channel filtering is only used for code - * accessing tokens via the lookahead methods {@link //LA}, {@link //LT}, and - * {@link //LB}.
+ * methods like {@link getText}. The channel filtering is only used for code + * accessing tokens via the lookahead methods {@link LA}, {@link LT}, and + * {@link LB}. * ** By default, tokens are placed on the default channel @@ -32,16 +35,25 @@ import { BufferedTokenStream } from './BufferedTokenStream.js'; * channel.
*/ export class CommonTokenStream extends BufferedTokenStream { - constructor(lexer, channel) { + /** + * Specifies the channel to use for filtering tokens. + * + *+ * The default value is {@link Token#DEFAULT_CHANNEL}, which matches the + * default channel assigned to tokens created by the lexer.
+ */ + protected channel = Token.DEFAULT_CHANNEL; + + public constructor(lexer: TokenSource, channel?: number) { super(lexer); - this.channel = channel === undefined ? Token.DEFAULT_CHANNEL : channel; + this.channel = channel ?? Token.DEFAULT_CHANNEL; } - adjustSeekIndex(i) { + public override adjustSeekIndex(i: number): number { return this.nextTokenOnChannel(i, this.channel); } - LB(k) { + public override LB(k: number): Token | null { if (k === 0 || this.index - k < 0) { return null; } @@ -56,10 +68,11 @@ export class CommonTokenStream extends BufferedTokenStream { if (i < 0) { return null; } + return this.tokens[i]; } - LT(k) { + public override LT(k: number): Token | null { this.lazyInit(); if (k === 0) { return null; @@ -77,15 +90,15 @@ export class CommonTokenStream extends BufferedTokenStream { } n += 1; } + return this.tokens[i]; } // Count EOF just once. - getNumberOfOnChannelTokens() { + public getNumberOfOnChannelTokens(): number { let n = 0; this.fill(); - for (let i = 0; i < this.tokens.length; i++) { - const t = this.tokens[i]; + for (const t of this.tokens) { if (t.channel === this.channel) { n += 1; } @@ -93,6 +106,7 @@ export class CommonTokenStream extends BufferedTokenStream { break; } } + return n; } } diff --git a/src/ConsoleErrorListener.js b/src/ConsoleErrorListener.js deleted file mode 100644 index 31224c0..0000000 --- a/src/ConsoleErrorListener.js +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { BaseErrorListener } from "./BaseErrorListener.js"; - -/** - * {@inheritDoc} - * - *- * This implementation prints messages to {@link System//err} containing the - * values of {@code line}, {@code charPositionInLine}, and {@code msg} using - * the following format.
- * - *- * line line:charPositionInLine msg - *- * - */ -export class ConsoleErrorListener extends BaseErrorListener { - constructor() { - super(); - } - - syntaxError(recognizer, offendingSymbol, line, column, msg, e) { - console.error("line " + line + ":" + column + " " + msg); - } -} - - -/** - * Provides a default instance of {@link ConsoleErrorListener}. - */ -ConsoleErrorListener.INSTANCE = new ConsoleErrorListener(); diff --git a/src/ConsoleErrorListener.ts b/src/ConsoleErrorListener.ts new file mode 100644 index 0000000..4b517fa --- /dev/null +++ b/src/ConsoleErrorListener.ts @@ -0,0 +1,40 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import { BaseErrorListener } from "./BaseErrorListener.js"; +import { RecognitionException } from "./RecognitionException.js"; +import { Recognizer } from "./Recognizer.js"; +import { ATNSimulator } from "./atn/ATNSimulator.js"; + +/** + * {@inheritDoc} + * + *
+ * This implementation prints messages to {@link System//err} containing the + * values of {@code line}, {@code charPositionInLine}, and {@code msg} using + * the following format.
+ * + *+ * line line:charPositionInLine msg + *+ * + */ +export class ConsoleErrorListener extends BaseErrorListener { + /** + * Provides a default instance of {@link ConsoleErrorListener}. + */ + // eslint-disable-next-line @typescript-eslint/naming-convention + public static readonly INSTANCE = new ConsoleErrorListener(); + + public override syntaxError
The default implementation simply calls {@link //endErrorCondition} to + * This field is used to propagate information about the lookahead following + * the previous match. Since prediction prefers completing the current rule + * to error recovery efforts, error reporting may occur later than the + * original point where it was discoverable. The original context is used to + * compute the true expected sets as though the reporting occurred as early + * as possible. + */ + protected nextTokensContext: ParserRuleContext | null = null; + + /** + * @see #nextTokensContext + */ + protected nextTokenState = 0; + + /** + *
The default implementation simply calls {@link endErrorCondition} to * ensure that the handler is not in error recovery mode.
*/ - reset(recognizer) { + public reset(recognizer: Parser): void { this.endErrorCondition(recognizer); } @@ -53,32 +72,31 @@ export class DefaultErrorStrategy { * This method is called to enter error recovery mode when a recognition * exception is reported. * - * @param recognizer the parser instance + * @param _recognizer the parser instance */ - beginErrorCondition(recognizer) { + public beginErrorCondition(_recognizer: Parser): void { this.errorRecoveryMode = true; } - inErrorRecoveryMode(recognizer) { + public inErrorRecoveryMode(_recognizer: Parser): boolean { return this.errorRecoveryMode; } /** * This method is called to leave error recovery mode after recovering from * a recognition exception. - * @param recognizer */ - endErrorCondition(recognizer) { + public endErrorCondition(_recognizer: Parser): void { this.errorRecoveryMode = false; - this.lastErrorStates = null; + this.lastErrorStates = new IntervalSet(); this.lastErrorIndex = -1; } /** * {@inheritDoc} - *The default implementation simply calls {@link //endErrorCondition}.
+ *The default implementation simply calls {@link endErrorCondition}.
*/ - reportMatch(recognizer) { + public reportMatch(recognizer: Parser): void { this.endErrorCondition(recognizer); } @@ -86,22 +104,22 @@ export class DefaultErrorStrategy { * {@inheritDoc} * *The default implementation returns immediately if the handler is already - * in error recovery mode. Otherwise, it calls {@link //beginErrorCondition} + * in error recovery mode. Otherwise, it calls {@link beginErrorCondition} * and dispatches the reporting task based on the runtime type of {@code e} * according to the following table.
* *This method is called when {@link //singleTokenDeletion} identifies + *
This method is called when {@link singleTokenDeletion} identifies * single-token deletion as a viable recovery strategy for a mismatched * input error.
* *The default implementation simply returns if the handler is already in - * error recovery mode. Otherwise, it calls {@link //beginErrorCondition} to + * error recovery mode. Otherwise, it calls {@link beginErrorCondition} to * enter error recovery mode, followed by calling * {@link Parser//notifyErrorListeners}.
* * @param recognizer the parser instance - * */ - reportUnwantedToken(recognizer) { + public reportUnwantedToken(recognizer: Parser): void { if (this.inErrorRecoveryMode(recognizer)) { return; } @@ -335,18 +351,18 @@ export class DefaultErrorStrategy { * method is called, the missing token has not yet been inserted. When this * method returns, {@code recognizer} is in error recovery mode. * - *This method is called when {@link //singleTokenInsertion} identifies + *
This method is called when {@link singleTokenInsertion} identifies * single-token insertion as a viable recovery strategy for a mismatched * input error.
* *The default implementation simply returns if the handler is already in - * error recovery mode. Otherwise, it calls {@link //beginErrorCondition} to + * error recovery mode. Otherwise, it calls {@link beginErrorCondition} to * enter error recovery mode, followed by calling * {@link Parser//notifyErrorListeners}.
* * @param recognizer the parser instance */ - reportMissingToken(recognizer) { + public reportMissingToken(recognizer: Parser): void { if (this.inErrorRecoveryMode(recognizer)) { return; } @@ -372,8 +388,7 @@ export class DefaultErrorStrategy { * token and delete it. Then consume and return the next token (which was * the {@code LA(2)} token) as the successful result of the match operation. * - *This recovery strategy is implemented by {@link - * //singleTokenDeletion}.
+ *This recovery strategy is implemented by {@link singleTokenDeletion}.
* *MISSING TOKEN (single token insertion)
* @@ -383,8 +398,7 @@ export class DefaultErrorStrategy { * "insertion" is performed by returning the created token as the successful * result of the match operation. * - *This recovery strategy is implemented by {@link - * //singleTokenInsertion}.
+ *This recovery strategy is implemented by {@link singleTokenInsertion}.
* *EXAMPLE
* @@ -405,17 +419,18 @@ export class DefaultErrorStrategy { * * * The attempt to match {@code ')'} will fail when it sees {@code ';'} and - * call {@link //recoverInline}. To recover, it sees that {@code LA(1)==';'} + * call {@link recoverInline}. To recover, it sees that {@code LA(1)==';'} * is in the set of tokens that can follow the {@code ')'} token reference * in rule {@code atom}. It can assume that you forgot the {@code ')'}. */ - recoverInline(recognizer) { + public recoverInline(recognizer: Parser): Token { // SINGLE TOKEN DELETION const matchedSymbol = this.singleTokenDeletion(recognizer); if (matchedSymbol !== null) { // we have deleted the extra token. // now, move past ttype token as if all were ok recognizer.consume(); + return matchedSymbol; } // SINGLE TOKEN INSERTION @@ -428,7 +443,7 @@ export class DefaultErrorStrategy { /** * This method implements the single-token insertion inline error recovery - * strategy. It is called by {@link //recoverInline} if the single-token + * strategy. It is called by {@link recoverInline} if the single-token * deletion strategy fails to recover from the mismatched input. If this * method returns {@code true}, {@code recognizer} will be in error recovery * mode. @@ -440,20 +455,21 @@ export class DefaultErrorStrategy { * token with the correct type to produce this behavior. * * @param recognizer the parser instance - * @return {@code true} if single-token insertion is a viable recovery + * @returns `true` if single-token insertion is a viable recovery * strategy for the current mismatched input, otherwise {@code false} */ - singleTokenInsertion(recognizer) { + public singleTokenInsertion(recognizer: Parser): boolean { const currentSymbolType = recognizer.tokenStream.LA(1); // if current token is consistent with what could come after current // ATN state, then we know we're missing a token; error recovery // is free to conjure up and insert the missing token - const atn = recognizer.interpreter.atn; + const atn = recognizer.atn; const currentState = atn.states[recognizer.state]; - const next = currentState.transitions[0].target; - const expectingAtLL2 = atn.nextTokens(next, recognizer._ctx); + const next = currentState!.transitions[0].target; + const expectingAtLL2 = atn.nextTokens(next, recognizer.context); if (expectingAtLL2.contains(currentSymbolType)) { this.reportMissingToken(recognizer); + return true; } else { return false; @@ -462,24 +478,24 @@ export class DefaultErrorStrategy { /** * This method implements the single-token deletion inline error recovery - * strategy. It is called by {@link //recoverInline} to attempt to recover + * strategy. It is called by {@link recoverInline} to attempt to recover * from mismatched input. If this method returns null, the parser and error * handler state will not have changed. If this method returns non-null, * {@code recognizer} will not be in error recovery mode since the * returned token was a successful match. * *If the single-token deletion is successful, this method calls - * {@link //reportUnwantedToken} to report the error, followed by + * {@link reportUnwantedToken} to report the error, followed by * {@link Parser//consume} to actually "delete" the extraneous token. Then, - * before returning {@link //reportMatch} is called to signal a successful + * before returning {@link reportMatch} is called to signal a successful * match.
* * @param recognizer the parser instance - * @return the successfully matched {@link Token} instance if single-token + * @returns the successfully matched {@link Token} instance if single-token * deletion successfully recovers from the mismatched input, otherwise * {@code null} */ - singleTokenDeletion(recognizer) { + public singleTokenDeletion(recognizer: Parser): Token | null { const nextTokenType = recognizer.tokenStream.LA(2); const expecting = this.getExpectedTokens(recognizer); if (expecting.contains(nextTokenType)) { @@ -492,6 +508,7 @@ export class DefaultErrorStrategy { // we want to return the token we're actually matching const matchedSymbol = recognizer.getCurrentToken(); this.reportMatch(recognizer); // we know current token is correct + return matchedSymbol; } else { return null; @@ -519,8 +536,8 @@ export class DefaultErrorStrategy { * override this method to create the appropriate tokens. * */ - getMissingSymbol(recognizer) { - const currentSymbol = recognizer.getCurrentToken(); + public getMissingSymbol(recognizer: Parser): Token { + const currentSymbol = recognizer.getCurrentToken() as CommonToken; const expecting = this.getExpectedTokens(recognizer); let expectedTokenType = Token.INVALID_TYPE; if (!expecting.isNil) { @@ -535,7 +552,7 @@ export class DefaultErrorStrategy { } let current = currentSymbol; - const lookBack = recognizer.tokenStream.LT(-1); + const lookBack = recognizer.tokenStream.LT(-1) as CommonToken; if (current.type === Token.EOF && lookBack !== null) { current = lookBack; } @@ -545,7 +562,7 @@ export class DefaultErrorStrategy { -1, -1, current.line, current.column); } - getExpectedTokens(recognizer) { + public getExpectedTokens(recognizer: Parser): IntervalSet { return recognizer.getExpectedTokens(); } @@ -558,7 +575,7 @@ export class DefaultErrorStrategy { * your token objects because you don't have to go modify your lexer * so that it creates a new Java type. */ - getTokenErrorDisplay(t) { + public getTokenErrorDisplay(t: Token | null): string { if (t === null) { return "* {@link ParserRuleContext} does not include field storage for the rule index * since the context classes created by the code generator override the * {@link #getRuleIndex} method to return the correct value for that context. * Since the parser interpreter does not use the context classes generated for a * parser, this class (with slightly more memory overhead per node) is used to - * provide equivalent functionality. + * provide equivalent functionality.
*/ export class InterpreterRuleContext extends ParserRuleContext { - public constructor(ruleIndex: number); - /** - * Constructs a new {@link InterpreterRuleContext} with the specified - * parent, invoking state, and rule index. - * - * @param ruleIndex The rule index for the current context. - * @param parent The parent context. - * @param invokingStateNumber The invoking state number. - */ - public constructor(ruleIndex: number, parent: ParserRuleContext | undefined, invokingStateNumber: number); + /** This is the backing field for {@link #getRuleIndex}. */ + #ruleIndex: number; - public get ruleIndex(): number; + public constructor(ruleIndex: number, parent: ParserRuleContext | null, invokingStateNumber?: number) { + if (invokingStateNumber !== undefined) { + super(parent, invokingStateNumber); + } else { + super(); + } + + this.#ruleIndex = ruleIndex; + } + + public override get ruleIndex(): number { + return this.#ruleIndex; + } } diff --git a/src/Lexer.d.ts b/src/Lexer.d.ts deleted file mode 100644 index 09477e7..0000000 --- a/src/Lexer.d.ts +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { Recognizer } from "./Recognizer.js"; -import { LexerATNSimulator } from "./atn/LexerATNSimulator.js"; -import { CharStream } from "./CharStream.js"; -import { Token } from "./Token.js"; -import { TokenSource } from "./TokenSource.js"; -import { TokenFactory } from "./TokenFactory.js"; -import { InputStream } from "./InputStream.js"; - -export declare abstract class Lexer extends RecognizerIf the symbol type does not match, * {@link ANTLRErrorStrategy//recoverInline} is called on the current error - * strategy to attempt recovery. If {@link //buildParseTree} is + * strategy to attempt recovery. If {@link buildParseTree} is * {@code true} and the token index of the symbol returned by * {@link ANTLRErrorStrategy//recoverInline} is -1, the symbol is added to * the parse tree by calling {@link ParserRuleContext//addErrorNode}.
* * @param ttype the token type to match - * @return the matched symbol + * @returns the matched symbol * @throws RecognitionException if the current input symbol did not match * {@code ttype} and the error strategy could not recover from the * mismatched symbol */ - match(ttype) { + public match(ttype: number): Token { let t = this.getCurrentToken(); if (t.type === ttype) { this.errorHandler.reportMatch(this); @@ -111,30 +162,31 @@ export class Parser extends Recognizer { // we must have conjured up a new token during single token // insertion // if it's not the current symbol - this._ctx.addErrorNode(t); + this.context!.addErrorNode(this.createErrorNode(this.context!, t)); } } + return t; } /** * Match current input symbol as a wildcard. If the symbol type matches * (i.e. has a value greater than 0), {@link ANTLRErrorStrategy//reportMatch} - * and {@link //consume} are called to complete the match process. + * and {@link consume} are called to complete the match process. * *If the symbol type does not match, * {@link ANTLRErrorStrategy//recoverInline} is called on the current error - * strategy to attempt recovery. If {@link //buildParseTree} is + * strategy to attempt recovery. If {@link buildParseTree} is * {@code true} and the token index of the symbol returned by * {@link ANTLRErrorStrategy//recoverInline} is -1, the symbol is added to * the parse tree by calling {@link ParserRuleContext//addErrorNode}.
* - * @return the matched symbol + * @returns the matched symbol * @throws RecognitionException if the current input symbol did not match * a wildcard and the error strategy could not recover from the mismatched * symbol */ - matchWildcard() { + public matchWildcard(): Token { let t = this.getCurrentToken(); if (t.type > 0) { this.errorHandler.reportMatch(this); @@ -145,14 +197,15 @@ export class Parser extends Recognizer { // we must have conjured up a new token during single token // insertion // if it's not the current symbol - this._ctx.addErrorNode(t); + this.context!.addErrorNode(this.createErrorNode(this.context!, t)); } } + return t; } - getParseListeners() { - return this._parseListeners || []; + public getParseListeners(): ParseTreeListener[] { + return this._parseListeners ?? []; } /** @@ -184,9 +237,9 @@ export class Parser extends Recognizer { * * @throws NullPointerException if {@code} listener is {@code null} */ - addParseListener(listener) { + public addParseListener(listener: ParseTreeListener): void { if (listener === null) { - throw "listener"; + throw new Error("listener"); } if (this._parseListeners === null) { this._parseListeners = []; @@ -199,10 +252,11 @@ export class Parser extends Recognizer { * *If {@code listener} is {@code null} or has not been added as a parse * listener, this method does nothing.
+ * * @param listener the listener to remove */ - removeParseListener(listener) { - if (this._parseListeners !== null) { + public removeParseListener(listener: ParseTreeListener | null): void { + if (this._parseListeners !== null && listener !== null) { const idx = this._parseListeners.indexOf(listener); if (idx >= 0) { this._parseListeners.splice(idx, 1); @@ -214,15 +268,15 @@ export class Parser extends Recognizer { } // Remove all parse listeners. - removeParseListeners() { + public removeParseListeners(): void { this._parseListeners = null; } // Notify any parse listeners of an enter rule event. - triggerEnterRuleEvent() { + public triggerEnterRuleEvent(): void { if (this._parseListeners !== null) { - const ctx = this._ctx; - this._parseListeners.forEach(function (listener) { + const ctx = this.context!; + this._parseListeners.forEach((listener) => { listener.enterEveryRule(ctx); ctx.enterRule(listener); }); @@ -231,26 +285,27 @@ export class Parser extends Recognizer { /** * Notify any parse listeners of an exit rule event. + * * @see //addParseListener */ - triggerExitRuleEvent() { + public triggerExitRuleEvent(): void { if (this._parseListeners !== null) { // reverse order walk of listeners - const ctx = this._ctx; - this._parseListeners.slice(0).reverse().forEach(function (listener) { + const ctx = this.context!; + this._parseListeners.slice(0).reverse().forEach((listener) => { ctx.exitRule(listener); listener.exitEveryRule(ctx); }); } } - getTokenFactory() { - return this._input.tokenSource._factory; + public getTokenFactory(): TokenFactoryE.g., given the following input with {@code A} being the current
* lookahead symbol, this function moves the cursor to {@code B} and returns
@@ -346,82 +401,83 @@ export class Parser extends Recognizer {
* {@link ParseTreeListener//visitErrorNode} is called on any parse
* listeners.
*/
- consume() {
+ public consume(): Token {
const o = this.getCurrentToken();
if (o.type !== Token.EOF) {
this.tokenStream.consume();
}
const hasListener = this._parseListeners !== null && this._parseListeners.length > 0;
if (this.buildParseTrees || hasListener) {
- let node;
+ let node: ErrorNode | TerminalNode;
if (this.errorHandler.inErrorRecoveryMode(this)) {
- node = this._ctx.addErrorNode(o);
+ node = this.context!.addErrorNode(this.createErrorNode(this.context!, o));
} else {
- node = this._ctx.addTokenNode(o);
+ node = this.context!.addTokenNode(o);
}
- node.invokingState = this.state;
+ //node.invokingState = this.state;
if (hasListener) {
- this._parseListeners.forEach(function (listener) {
- if (node instanceof ErrorNode || (node.isErrorNode !== undefined && node.isErrorNode())) {
+ this._parseListeners!.forEach((listener) => {
+ if (node instanceof ErrorNode) {
listener.visitErrorNode(node);
- } else if (node instanceof TerminalNode) {
+ } else {
listener.visitTerminal(node);
}
});
}
}
+
return o;
}
- addContextToParseTree() {
+ public addContextToParseTree(): void {
// add current context to parent if we have a parent
- if (this._ctx.parent !== null) {
- this._ctx.parent.addChild(this._ctx);
+ if (this.context?.parent !== null) {
+ this.context!.parent.addChild(this.context!);
}
}
/**
* Always called by generated parsers upon entry to a rule. Access field
- * {@link //_ctx} get the current context.
+ * {@link context} get the current context.
*/
- enterRule(localctx, state, ruleIndex) {
+ public enterRule(localctx: ParserRuleContext, state: number, _ruleIndex: number): void {
this.state = state;
- this._ctx = localctx;
- this._ctx.start = this._input.LT(1);
+ this.context = localctx;
+ this.context.start = this._input!.LT(1);
if (this.buildParseTrees) {
this.addContextToParseTree();
}
this.triggerEnterRuleEvent();
}
- exitRule() {
- this._ctx.stop = this._input.LT(-1);
+ public exitRule(): void {
+ this.context!.stop = this._input!.LT(-1);
// trigger event on _ctx, before it reverts to parent
this.triggerExitRuleEvent();
- this.state = this._ctx.invokingState;
- this._ctx = this._ctx.parent;
+ this.state = this.context!.invokingState;
+ this.context = this.context!.parent;
}
- enterOuterAlt(localctx, altNum) {
+ public enterOuterAlt(localctx: ParserRuleContext, altNum: number): void {
localctx.setAltNumber(altNum);
// if we have new localctx, make sure we replace existing ctx
// that is previous child of parse tree
- if (this.buildParseTrees && this._ctx !== localctx) {
- if (this._ctx.parent !== null) {
- this._ctx.parent.removeLastChild();
- this._ctx.parent.addChild(localctx);
+ if (this.buildParseTrees && this.context !== localctx) {
+ if (this.context!.parent !== null) {
+ this.context!.parent.removeLastChild();
+ this.context!.parent.addChild(localctx);
}
}
- this._ctx = localctx;
+ this.context = localctx;
}
/**
* Get the precedence level for the top-most precedence rule.
*
- * @return The precedence level for the top-most precedence rule, or -1 if
+ * @returns The precedence level for the top-most precedence rule, or -1 if
* the parser context is not nested within a precedence rule.
*/
- getPrecedence() {
+ public getPrecedence(): number {
if (this._precedenceStack.length === 0) {
return -1;
} else {
@@ -429,67 +485,68 @@ export class Parser extends Recognizer {
}
}
- enterRecursionRule(localctx, state, ruleIndex, precedence) {
+ public enterRecursionRule(localctx: ParserRuleContext, state: number, ruleIndex: number, precedence: number): void {
this.state = state;
this._precedenceStack.push(precedence);
- this._ctx = localctx;
- this._ctx.start = this._input.LT(1);
+ this.context = localctx;
+ this.context.start = this._input!.LT(1);
this.triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules
}
- // Like {@link //enterRule} but for recursive rules.
- pushNewRecursionContext(localctx, state, ruleIndex) {
- const previous = this._ctx;
- previous._parent = localctx;
+ // Like {@link enterRule} but for recursive rules.
+ public pushNewRecursionContext(localctx: ParserRuleContext, state: number, _ruleIndex: number): void {
+ const previous = this.context!;
+ previous.parent = localctx;
previous.invokingState = state;
- previous.stop = this._input.LT(-1);
+ previous.stop = this._input!.LT(-1);
- this._ctx = localctx;
- this._ctx.start = previous.start;
+ this.context = localctx;
+ this.context.start = previous.start;
if (this.buildParseTrees) {
- this._ctx.addChild(previous);
+ this.context.addChild(previous);
}
this.triggerEnterRuleEvent(); // simulates rule entry for left-recursive rules
}
- unrollRecursionContexts(parent) {
+ public unrollRecursionContexts(parent: ParserRuleContext | null): void {
this._precedenceStack.pop();
- this._ctx.stop = this._input.LT(-1);
- const retCtx = this._ctx; // save current ctx (return value)
+ this.context!.stop = this._input!.LT(-1);
+ const retCtx = this.context!; // save current ctx (return value)
// unroll so _ctx is as it was before call to recursive method
const parseListeners = this.getParseListeners();
if (parseListeners !== null && parseListeners.length > 0) {
- while (this._ctx !== parent) {
+ while (this.context !== parent) {
this.triggerExitRuleEvent();
- this._ctx = this._ctx.parent;
+ this.context = this.context!.parent;
}
} else {
- this._ctx = parent;
+ this.context = parent;
}
// hook into tree
- retCtx._parent = parent;
+ retCtx.parent = parent;
if (this.buildParseTrees && parent !== null) {
// add return ctx into invoking rule's tree
parent.addChild(retCtx);
}
}
- getInvokingContext(ruleIndex) {
- let ctx = this._ctx;
+ public getInvokingContext(ruleIndex: number): ParserRuleContext | null {
+ let ctx = this.context;
while (ctx !== null) {
if (ctx.ruleIndex === ruleIndex) {
return ctx;
}
ctx = ctx.parent;
}
+
return null;
}
- precpred(localctx, precedence) {
+ public override precpred(_localctx: ParserRuleContext | null, precedence: number): boolean {
return precedence >= this._precedenceStack[this._precedenceStack.length - 1];
}
- inContext(context) {
+ public inContext(_context: string): boolean {
// TODO: useful in parser?
return false;
}
@@ -505,13 +562,13 @@ export class Parser extends Recognizer {
*
*
* @param symbol the symbol type to check
- * @return {@code true} if {@code symbol} can follow the current state in
+ * @returns `true` if {@code symbol} can follow the current state in
* the ATN, otherwise {@code false}.
*/
- isExpectedToken(symbol) {
+ public isExpectedToken(symbol: number): boolean {
const atn = this.interpreter.atn;
- let ctx = this._ctx;
- const s = atn.states[this.state];
+ let ctx = this.context;
+ const s = atn.states[this.state]!;
let following = atn.nextTokens(s);
if (following.contains(symbol)) {
return true;
@@ -520,8 +577,8 @@ export class Parser extends Recognizer {
return false;
}
while (ctx !== null && ctx.invokingState >= 0 && following.contains(Token.EPSILON)) {
- const invokingState = atn.states[ctx.invokingState];
- const rt = invokingState.transitions[0];
+ const invokingState = atn.states[ctx.invokingState]!;
+ const rt = invokingState.transitions[0] as RuleTransition;
following = atn.nextTokens(rt.followState);
if (following.contains(symbol)) {
return true;
@@ -537,25 +594,26 @@ export class Parser extends Recognizer {
/**
* Computes the set of input symbols which could follow the current parser
- * state and context, as given by {@link //getState} and {@link //getContext},
+ * state and context, as given by {@link getState} and {@link getContext},
* respectively.
*
- * @see ATN//getExpectedTokens(int, RuleContext)
+ * @see ATN.getExpectedTokens(int, RuleContext)
*/
- getExpectedTokens() {
- return this.interpreter.atn.getExpectedTokens(this.state, this._ctx);
+ public getExpectedTokens(): IntervalSet {
+ return this.interpreter.atn.getExpectedTokens(this.state, this.context!);
}
- getExpectedTokensWithinCurrentRule() {
+ public getExpectedTokensWithinCurrentRule(): IntervalSet {
const atn = this.interpreter.atn;
- const s = atn.states[this.state];
+ const s = atn.states[this.state]!;
+
return atn.nextTokens(s);
}
// Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.
- getRuleIndex(ruleName) {
+ public getRuleIndex(ruleName: string): number {
const ruleIndex = this.getRuleIndexMap().get(ruleName);
- if (ruleIndex !== null) {
+ if (ruleIndex != null) {
return ruleIndex;
} else {
return -1;
@@ -570,10 +628,10 @@ export class Parser extends Recognizer {
*
* this is very useful for error messages.
*/
- getRuleInvocationStack(p) {
- p = p || null;
+ public getRuleInvocationStack(p?: RuleContext | null): string[] {
+ p = p ?? null;
if (p === null) {
- p = this._ctx;
+ p = this.context;
}
const stack = [];
while (p !== null) {
@@ -586,39 +644,48 @@ export class Parser extends Recognizer {
}
p = p.parent;
}
+
return stack;
}
- // For debugging and other purposes.
- getDFAStrings() {
+ /**
+ * For debugging and other purposes.
+ *
+ * TODO: this differs from the Java version. Change it.
+ */
+ public getDFAStrings(): string {
return this.interpreter.decisionToDFA.toString();
}
- // For debugging and other purposes.
- dumpDFA() {
+ /** For debugging and other purposes. */
+ public dumpDFA(): void {
let seenOne = false;
- for (let i = 0; i < this.interpreter.decisionToDFA.length; i++) {
- const dfa = this.interpreter.decisionToDFA[i];
+ for (const dfa of this.interpreter.decisionToDFA) {
if (dfa.states.length > 0) {
if (seenOne) {
console.log();
}
- console.log("Decision " + dfa.decision + ":");
- console.log(dfa.toString(this.vocabulary));
+
+ // During tests this field is assigned. Avoids accessing Node.js stuff outside of the tests.
+ if (this.printer) {
+ this.printer.println("Decision " + dfa.decision + ":");
+ this.printer.print(dfa.toString(this.vocabulary));
+ }
+
seenOne = true;
}
}
}
- getSourceName() {
- return this._input.sourceName;
+ public getSourceName(): string {
+ return this._input!.getSourceName();
}
/**
* During a parse is sometimes useful to listen in on the rule entry and exit
* events as well as token matches. this is for quick and dirty debugging.
*/
- setTrace(trace) {
+ public setTrace(trace: boolean): void {
if (!trace) {
this.removeParseListener(this._tracer);
this._tracer = null;
@@ -631,19 +698,11 @@ export class Parser extends Recognizer {
}
}
- createTerminalNode(parent, t) {
- return new TerminalNodeImpl(t);
+ public createTerminalNode(parent: ParserRuleContext, t: Token): TerminalNode {
+ return new TerminalNode(t);
}
- createErrorNode(parent, t) {
- return new ErrorNodeImpl(t);
+ public createErrorNode(parent: ParserRuleContext, t: Token): ErrorNode {
+ return new ErrorNode(t);
}
}
-
-/**
- * this field maps from the serialized ATN string to the deserialized {@link ATN} with
- * bypass alternatives.
- *
- * @see ATNDeserializationOptions//isGenerateRuleBypassTransitions()
- */
-Parser.bypassAltsAtnCache = {};
diff --git a/src/ParserInterpreter.d.ts b/src/ParserInterpreter.d.ts
deleted file mode 100644
index 83696a2..0000000
--- a/src/ParserInterpreter.d.ts
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * Copyright (c) The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
- */
-
-import { ATN } from "./atn/ATN.js";
-import { ATNState } from "./atn/ATNState.js";
-import { BitSet } from "./misc/BitSet.js";
-import { DecisionState } from "./atn/DecisionState.js";
-import { InterpreterRuleContext } from "./InterpreterRuleContext.js";
-import { Parser } from "./Parser.js";
-import { RecognitionException } from "./RecognitionException.js";
-import { Token } from "./Token.js";
-import { TokenStream } from "./TokenStream.js";
-import { Vocabulary } from "./Vocabulary.js";
-import { ParserRuleContext } from "./ParserRuleContext.js";
-import { RuleContext } from "./atn/RuleContext.js";
-
-/**
- * A parser simulator that mimics what ANTLR's generated
- * parser code does. A ParserATNSimulator is used to make
- * predictions via adaptivePredict but this class moves a pointer through the
- * ATN to simulate parsing. ParserATNSimulator just
- * makes us efficient rather than having to backtrack, for example.
- *
- * This properly creates parse trees even for left recursive rules.
- *
- * We rely on the left recursive rule invocation and special predicate
- * transitions to make left recursive rules work.
- *
- * See TestParserInterpreter for examples.
- */
-export abstract class ParserInterpreter extends Parser {
- /**
- * This identifies StarLoopEntryState's that begin the (...)*
- * precedence loops of left recursive rules.
- */
- protected pushRecursionContextStates: BitSet;
-
- /**
- * This stack corresponds to the _parentctx, _parentState pair of locals
- * that would exist on call stack frames with a recursive descent parser;
- * in the generated function for a left-recursive rule you'd see:
- *
- * private EContext e(int _p) {
- * ParserRuleContext _parentctx = _ctx; // Pair.a
- * int _parentState = state; // Pair.b
- * ...
- * }
- *
- * Those values are used to create new recursive rule invocation contexts
- * associated with left operand of an alt like "expr '*' expr".
- */
- protected readonly _parentContextStack: Array<[ParserRuleContext, number]>;
-
- /**
- * We need a map from (decision,inputIndex)->forced alt for computing ambiguous
- * parse trees. For now, we allow exactly one override.
- */
- protected overrideDecision: number;
- protected overrideDecisionInputIndex: number;
- protected overrideDecisionAlt: number;
- protected overrideDecisionReached: boolean; // latch and only override once; error might trigger infinite loop
-
- /**
- * What is the current context when we override a decisions? This tells
- * us what the root of the parse tree is when using override
- * for an ambiguity/lookahead check.
- */
- protected _overrideDecisionRoot?: InterpreterRuleContext;
-
- protected _rootContext: InterpreterRuleContext;
-
- /**
- * A copy constructor that creates a new parser interpreter by reusing
- * the fields of a previous interpreter.
- *
- * @param old The interpreter to copy
- */
- public constructor(old: ParserInterpreter);
- public constructor(grammarFileName: string, vocabulary: Vocabulary, ruleNames: string[], atn: ATN,
- input: TokenStream);
- public constructor(grammarFileName: ParserInterpreter | string, vocabulary?: Vocabulary,
- ruleNames?: string[], atn?: ATN, input?: TokenStream);
-
- public reset(resetInput?: boolean): void;
-
- public override get atn(): ATN;
- public override get vocabulary(): Vocabulary;
- public override get ruleNames(): string[];
- public override get grammarFileName(): string;
-
- /**
- * Begin parsing at startRuleIndex
- *
- * @param startRuleIndex the grammar rule to start parsing from
- *
- * @returns the parse tree for the entire input
- */
- public parse(startRuleIndex: number): ParserRuleContext;
-
- public enterRecursionRule(localctx: ParserRuleContext, state: number, ruleIndex: number, precedence: number): void;
-
- /**
- * Override this parser interpreters normal decision-making process
- * at a particular decision and input token index. Instead of
- * allowing the adaptive prediction mechanism to choose the
- * first alternative within a block that leads to a successful parse,
- * force it to take the alternative, 1..n for n alternatives.
- *
- * As an implementation limitation right now, you can only specify one
- * override. This is sufficient to allow construction of different
- * parse trees for ambiguous input. It means re-parsing the entire input
- * in general because you're never sure where an ambiguous sequence would
- * live in the various parse trees. For example, in one interpretation,
- * an ambiguous input sequence would be matched completely in expression
- * but in another it could match all the way back to the root.
- *
- * s : e '!'? ;
- * e : ID
- * | ID '!'
- * ;
- *
- * Here, x! can be matched as (s (e ID) !) or (s (e ID !)). In the first
- * case, the ambiguous sequence is fully contained only by the root.
- * In the second case, the ambiguous sequences fully contained within just
- * e, as in: (e ID !).
- *
- * Rather than trying to optimize this and make
- * some intelligent decisions for optimization purposes, I settled on
- * just re-parsing the whole input and then using
- * {link Trees#getRootOfSubtreeEnclosingRegion} to find the minimal
- * subtree that contains the ambiguous sequence. I originally tried to
- * record the call stack at the point the parser detected and ambiguity but
- * left recursive rules create a parse tree stack that does not reflect
- * the actual call stack. That impedance mismatch was enough to make
- * it it challenging to restart the parser at a deeply nested rule
- * invocation.
- *
- * Only parser interpreters can override decisions so as to avoid inserting
- * override checking code in the critical ALL(*) prediction execution path.
- *
- * @param decision
- * @param tokenIndex
- * @param forcedAlt
- */
- public addDecisionOverride(decision: number, tokenIndex: number, forcedAlt: number): void;
-
- protected get atnState(): ATNState;
-
- protected visitState(p: ATNState): void;
-
- /**
- * Method visitDecisionState() is called when the interpreter reaches
- * a decision state (instance of DecisionState). It gives an opportunity
- * for subclasses to track interesting things.
- *
- * @param p : the decision state
- *
- * @returns The prediction made by the interpreter for this decision state.
- */
- protected visitDecisionState(p: DecisionState): number;
-
- /**
- * Provide simple "factory" for InterpreterRuleContext's.
- *
- * @param parent
- * @param invokingStateNumber
- * @param ruleIndex
- */
- protected createInterpreterRuleContext(parent: ParserRuleContext | undefined, invokingStateNumber: number,
- ruleIndex: number): InterpreterRuleContext;
-
- protected visitRuleStopState(p: ATNState): void;
-
- /**
- * Rely on the error handler for this parser but, if no tokens are consumed
- * to recover, add an error node. Otherwise, nothing is seen in the parse
- * tree.
- *
- * @param e
- */
- protected recover(e: RecognitionException): void;
-
- protected recoverInline(): Token;
-
- /**
- * Return the root of the parse, which can be useful if the parser
- * bails out. You still can access the top node. Note that,
- * because of the way left recursive rules add children, it's possible
- * that the root will not have any children if the start rule immediately
- * called and left recursive rule that fails.
- *
- * @since 4.5.1
- */
- public get rootContext(): InterpreterRuleContext;
-
- public abstract action(localctx: RuleContext | null, ruleIndex: number, actionIndex: number): void;
-}
diff --git a/src/ParserInterpreter.js b/src/ParserInterpreter.ts
similarity index 54%
rename from src/ParserInterpreter.js
rename to src/ParserInterpreter.ts
index 0e684a6..558ef72 100644
--- a/src/ParserInterpreter.js
+++ b/src/ParserInterpreter.ts
@@ -4,6 +4,8 @@
* can be found in the LICENSE.txt file in the project root.
*/
+/* eslint-disable no-underscore-dangle */
+
import { ATNState } from "./atn/ATNState.js";
import { BitSet } from "./misc/BitSet.js";
import { FailedPredicateException } from "./FailedPredicateException.js";
@@ -19,29 +21,42 @@ import { ATNStateType } from "./atn/ATNStateType.js";
import { TransitionType } from "./atn/TransitionType.js";
import { DFA } from "./dfa/DFA.js";
import { PredictionContextCache } from "./atn/PredictionContextCache.js";
+import { ATN } from "./atn/ATN.js";
+import { Vocabulary } from "./Vocabulary.js";
+import { TokenStream } from "./TokenStream.js";
+import { ParserRuleContext } from "./ParserRuleContext.js";
+import { RuleStartState } from "./atn/RuleStartState.js";
+import { RuleTransition } from "./atn/RuleTransition.js";
+import { PredicateTransition } from "./atn/PredicateTransition.js";
+import { ActionTransition } from "./atn/ActionTransition.js";
+import { PrecedencePredicateTransition } from "./atn/PrecedencePredicateTransition.js";
+import { DecisionState } from "./atn/DecisionState.js";
+import { TokenSource } from "./TokenSource.js";
+import { CharStream } from "./CharStream.js";
export class ParserInterpreter extends Parser {
- #grammarFileName;
- #atn;
- #ruleNames;
- #vocabulary;
- #decisionToDFA;
- #sharedContextCache = new PredictionContextCache();
+ protected _rootContext: InterpreterRuleContext;
- #pushRecursionContextStates;
+ protected _parentContextStack: Array<[ParserRuleContext | null, number]> = [];
- _rootContext;
+ protected overrideDecision = -1;
+ protected overrideDecisionInputIndex = -1;
+ protected overrideDecisionAlt = -1;
+ protected overrideDecisionReached = false;
- _parentContextStack = [];
+ protected _overrideDecisionRoot: InterpreterRuleContext | null = null;
- overrideDecision = -1;
- overrideDecisionInputIndex = -1;
- overrideDecisionAlt = -1;
- overrideDecisionReached = false;
+ #grammarFileName: string;
+ #atn: ATN;
+ #ruleNames: string[];
+ #vocabulary: Vocabulary;
+ #decisionToDFA: DFA[];
+ #sharedContextCache = new PredictionContextCache();
- _overrideDecisionRoot = undefined;
+ #pushRecursionContextStates;
- constructor(grammarFileName, vocabulary, ruleNames, atn, input) {
+ public constructor(grammarFileName: string, vocabulary: Vocabulary, ruleNames: string[], atn: ATN,
+ input: TokenStream) {
super(input);
this.#grammarFileName = grammarFileName;
this.#atn = atn;
@@ -50,13 +65,13 @@ export class ParserInterpreter extends Parser {
// Cache the ATN states where pushNewRecursionContext() must be called in `visitState()`.
this.#pushRecursionContextStates = new BitSet();
- for (let state of atn.states) {
+ for (const state of atn.states) {
if (state instanceof StarLoopEntryState && state.precedenceRuleDecision) {
this.#pushRecursionContextStates.set(state.stateNumber);
}
}
- this.#decisionToDFA = atn.decisionToState.map(function (ds, i) {
+ this.#decisionToDFA = atn.decisionToState.map((ds, i) => {
return new DFA(ds, i);
});
@@ -64,37 +79,37 @@ export class ParserInterpreter extends Parser {
this.interpreter = new ParserATNSimulator(this, atn, this.#decisionToDFA, this.#sharedContextCache);
}
- reset(resetInput) {
- super.reset(resetInput);
+ public override reset(): void {
+ super.reset();
this.overrideDecisionReached = false;
- this._overrideDecisionRoot = undefined;
+ this._overrideDecisionRoot = null;
}
- get atn() {
+ public override get atn(): ATN {
return this.#atn;
}
- get vocabulary() {
+ public get vocabulary(): Vocabulary {
return this.#vocabulary;
}
- get ruleNames() {
+ public get ruleNames(): string[] {
return this.#ruleNames;
}
- get grammarFileName() {
+ public get grammarFileName(): string {
return this.#grammarFileName;
}
- get atnState() {
- return this.#atn.states[this.state];
+ public get atnState(): ATNState {
+ return this.#atn.states[this.state]!;
}
- parse(startRuleIndex) {
- let startRuleStartState = this.#atn.ruleToStartState[startRuleIndex];
+ public parse(startRuleIndex: number): ParserRuleContext {
+ const startRuleStartState = this.#atn.ruleToStartState[startRuleIndex]!;
- this._rootContext = this.createInterpreterRuleContext(undefined, ATNState.INVALID_STATE_NUMBER, startRuleIndex);
+ this._rootContext = this.createInterpreterRuleContext(null, ATNState.INVALID_STATE_NUMBER, startRuleIndex);
if (startRuleStartState.isPrecedenceRule) {
this.enterRecursionRule(this._rootContext, startRuleStartState.stateNumber, startRuleIndex, 0);
}
@@ -103,19 +118,21 @@ export class ParserInterpreter extends Parser {
}
while (true) {
- let p = this.atnState;
+ const p = this.atnState;
switch (p.stateType) {
case ATNStateType.RULE_STOP:
// pop; return from rule
- if (this._ctx.isEmpty) {
+ if (this.context?.isEmpty) {
if (startRuleStartState.isPrecedenceRule) {
- let result = this._ctx;
- let parentContext = this._parentContextStack.pop();
+ const result = this.context;
+ const parentContext = this._parentContextStack.pop()!;
this.unrollRecursionContexts(parentContext[0]);
+
return result;
}
else {
this.exitRule();
+
return this._rootContext;
}
}
@@ -126,11 +143,10 @@ export class ParserInterpreter extends Parser {
default:
try {
this.visitState(p);
- }
- catch (e) {
+ } catch (e) {
if (e instanceof RecognitionException) {
- this.state = this.#atn.ruleToStopState[p.ruleIndex].stateNumber;
- this.context.exception = e;
+ this.state = this.#atn.ruleToStopState[p.ruleIndex]!.stateNumber;
+ this.context!.exception = e;
this.errorHandler.reportError(this, e);
this.recover(e);
} else {
@@ -143,41 +159,55 @@ export class ParserInterpreter extends Parser {
}
}
- enterRecursionRule(localctx, state, ruleIndex, precedence) {
- this._parentContextStack.push([this._ctx, localctx.invokingState]);
+ public addDecisionOverride(decision: number, tokenIndex: number, forcedAlt: number): void {
+ this.overrideDecision = decision;
+ this.overrideDecisionInputIndex = tokenIndex;
+ this.overrideDecisionAlt = forcedAlt;
+ }
+
+ public get overrideDecisionRoot(): InterpreterRuleContext | null {
+ return this._overrideDecisionRoot;
+ }
+
+ public get rootContext(): InterpreterRuleContext {
+ return this._rootContext;
+ }
+
+ public override enterRecursionRule(localctx: ParserRuleContext, state: number, ruleIndex: number,
+ precedence: number): void {
+ this._parentContextStack.push([this.context, localctx.invokingState]);
super.enterRecursionRule(localctx, state, ruleIndex, precedence);
}
- visitState(p) {
+ protected visitState(p: ATNState): void {
let predictedAlt = 1;
- if (p.transitions.length > 1) {
+ if (p instanceof DecisionState) {
predictedAlt = this.visitDecisionState(p);
}
- let transition = p.transitions[predictedAlt - 1];
+ const transition = p.transitions[predictedAlt - 1];
switch (transition.serializationType) {
case TransitionType.EPSILON:
if (this.#pushRecursionContextStates.get(p.stateNumber) &&
!(transition.target instanceof LoopEndState)) {
// We are at the start of a left recursive rule's (...)* loop
// and we're not taking the exit branch of loop.
- let parentContext = this._parentContextStack[this._parentContextStack.length - 1];
- let localctx =
- this.createInterpreterRuleContext(parentContext[0], parentContext[1], this._ctx.ruleIndex);
- this.pushNewRecursionContext(localctx,
- this.#atn.ruleToStartState[p.ruleIndex].stateNumber,
- this._ctx.ruleIndex);
+ const parentContext = this._parentContextStack[this._parentContextStack.length - 1];
+ const localctx =
+ this.createInterpreterRuleContext(parentContext[0], parentContext[1], this.context!.ruleIndex);
+ this.pushNewRecursionContext(localctx, this.#atn.ruleToStartState[p.ruleIndex]!.stateNumber,
+ this.context!.ruleIndex);
}
break;
case TransitionType.ATOM:
- this.match(transition.label.minElement);
+ this.match(transition.label!.minElement);
break;
case TransitionType.RANGE:
case TransitionType.SET:
case TransitionType.NOT_SET:
- if (!transition.matches(this._input.LA(1), Token.MIN_USER_TOKEN_TYPE, 65535)) {
+ if (!transition.matches(this._input!.LA(1), Token.MIN_USER_TOKEN_TYPE, 65535)) {
this.recoverInline();
}
this.matchWildcard();
@@ -188,11 +218,12 @@ export class ParserInterpreter extends Parser {
break;
case TransitionType.RULE:
- let ruleStartState = transition.target;
- let ruleIndex = ruleStartState.ruleIndex;
- let newContext = this.createInterpreterRuleContext(this._ctx, p.stateNumber, ruleIndex);
+ const ruleStartState = transition.target as RuleStartState;
+ const ruleIndex = ruleStartState.ruleIndex;
+ const newContext = this.createInterpreterRuleContext(this.context, p.stateNumber, ruleIndex);
if (ruleStartState.isPrecedenceRule) {
- this.enterRecursionRule(newContext, ruleStartState.stateNumber, ruleIndex, (transition).precedence);
+ this.enterRecursionRule(newContext, ruleStartState.stateNumber, ruleIndex,
+ (transition as RuleTransition).precedence);
}
else {
this.enterRule(newContext, transition.target.stateNumber, ruleIndex);
@@ -200,21 +231,21 @@ export class ParserInterpreter extends Parser {
break;
case TransitionType.PREDICATE:
- let predicateTransition = transition;
- if (!this.sempred(this._ctx, predicateTransition.ruleIndex, predicateTransition.predIndex)) {
+ const predicateTransition = transition as PredicateTransition;
+ if (!this.sempred(this.context, predicateTransition.ruleIndex, predicateTransition.predIndex)) {
throw new FailedPredicateException(this);
}
break;
case TransitionType.ACTION:
- let actionTransition = transition;
- this.action(this._ctx, actionTransition.ruleIndex, actionTransition.actionIndex);
+ const actionTransition = transition as ActionTransition;
+ this.action(this.context, actionTransition.ruleIndex, actionTransition.actionIndex);
break;
case TransitionType.PRECEDENCE:
- if (!this.precpred(this._ctx, transition.precedence)) {
- let precedence = transition.precedence;
+ if (!this.precpred(this.context, (transition as PrecedencePredicateTransition).precedence)) {
+ const precedence = (transition as PrecedencePredicateTransition).precedence;
throw new FailedPredicateException(this, `precpred(_ctx, ${precedence})`);
}
break;
@@ -226,68 +257,59 @@ export class ParserInterpreter extends Parser {
this.state = transition.target.stateNumber;
}
- visitDecisionState(p) {
+ protected visitDecisionState(p: DecisionState): number {
let predictedAlt = 1;
if (p.transitions.length > 1) {
this.errorHandler.sync(this);
- let decision = p.decision;
- if (decision === this.overrideDecision && this._input.index === this.overrideDecisionInputIndex &&
+ const decision = p.decision;
+ if (decision === this.overrideDecision && this._input!.index === this.overrideDecisionInputIndex &&
!this.overrideDecisionReached) {
predictedAlt = this.overrideDecisionAlt;
this.overrideDecisionReached = true;
} else {
- predictedAlt = this.interpreter.adaptivePredict(this._input, decision, this._ctx);
+ predictedAlt = this.interpreter.adaptivePredict(this._input!, decision, this.context);
}
}
return predictedAlt;
}
- createInterpreterRuleContext(parent, invokingStateNumber, ruleIndex) {
+ protected createInterpreterRuleContext(parent: ParserRuleContext | null, invokingStateNumber: number,
+ ruleIndex: number): InterpreterRuleContext {
return new InterpreterRuleContext(ruleIndex, parent, invokingStateNumber);
}
- visitRuleStopState(p) {
- let ruleStartState = this.#atn.ruleToStartState[p.ruleIndex];
+ protected visitRuleStopState(p: ATNState): void {
+ const ruleStartState = this.#atn.ruleToStartState[p.ruleIndex]!;
if (ruleStartState.isPrecedenceRule) {
- let parentContext = this._parentContextStack.pop();
- this.unrollRecursionContexts(parentContext[0]);
- this.state = parentContext[1];
+ const [parentContext, state] = this._parentContextStack.pop()!;
+ this.unrollRecursionContexts(parentContext);
+ this.state = state;
} else {
this.exitRule();
}
- let ruleTransition = this.#atn.states[this.state].transitions[0];
+ const ruleTransition = this.#atn.states[this.state]!.transitions[0] as RuleTransition;
this.state = ruleTransition.followState.stateNumber;
}
- addDecisionOverride(decision, tokenIndex, forcedAlt) {
- this.overrideDecision = decision;
- this.overrideDecisionInputIndex = tokenIndex;
- this.overrideDecisionAlt = forcedAlt;
- }
-
- get overrideDecisionRoot() {
- return this._overrideDecisionRoot;
- }
-
- recover(e) {
- let i = this._input.index;
+ protected recover(e: RecognitionException): void {
+ const i = this._input!.index;
this.errorHandler.recover(this, e);
- if (this._input.index === i) {
+ if (this._input!.index === i) {
// no input consumed, better add an error node
- let tok = e.offendingToken;
+ const tok = e.offendingToken;
if (!tok) {
throw new Error("Expected exception to have an offending token");
}
- const source = tok.getTokenSource();
+ const source = tok.tokenSource;
const stream = source?.inputStream ?? null;
- const sourcePair = [source, stream];
+ const sourcePair: [TokenSource | null, CharStream | null] = [source, stream];
if (e instanceof InputMismatchException) {
- let expectedTokens = e.getExpectedTokens();
+ const expectedTokens = e.getExpectedTokens();
if (!expectedTokens) {
throw new Error("Expected the exception to provide expected tokens");
}
@@ -298,30 +320,18 @@ export class ParserInterpreter extends Parser {
expectedTokenType = expectedTokens.minElement;
}
- let errToken =
- this.getTokenFactory().create(sourcePair,
- expectedTokenType, tok.text,
- Token.DEFAULT_CHANNEL,
- -1, -1, // invalid start/stop
- tok.line, tok.charPositionInLine);
- this._ctx.addErrorNode(this.createErrorNode(this._ctx, errToken));
+ const errToken = this.getTokenFactory().create(sourcePair, expectedTokenType, tok.text,
+ Token.DEFAULT_CHANNEL, -1, -1, tok.line, tok.column);
+ this.context!.addErrorNode(this.createErrorNode(this.context!, errToken));
} else { // NoViableAlt
- let errToken =
- this.getTokenFactory().create(sourcePair,
- Token.INVALID_TYPE, tok.text,
- Token.DEFAULT_CHANNEL,
- -1, -1, // invalid start/stop
- tok.line, tok.charPositionInLine);
- this._ctx.addErrorNode(this.createErrorNode(this._ctx, errToken));
+ const errToken = this.getTokenFactory().create(sourcePair, Token.INVALID_TYPE, tok.text,
+ Token.DEFAULT_CHANNEL, -1, -1, tok.line, tok.column);
+ this.context!.addErrorNode(this.createErrorNode(this.context!, errToken));
}
}
}
- recoverInline() {
+ protected recoverInline(): Token {
return this.errorHandler.recoverInline(this);
}
-
- get rootContext() {
- return this._rootContext;
- }
}
diff --git a/src/ParserRuleContext.d.ts b/src/ParserRuleContext.d.ts
deleted file mode 100644
index dfdc5b8..0000000
--- a/src/ParserRuleContext.d.ts
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
- */
-
-import { Token } from "./Token.js";
-import { RecognitionException } from "./RecognitionException.js";
-import { ErrorNode } from "./tree/ErrorNode.js";
-import { TerminalNode } from "./tree/TerminalNode.js";
-import { RuleContext } from "./atn/RuleContext.js";
-
-export declare class ParserRuleContext extends RuleContext {
- // eslint-disable-next-line @typescript-eslint/naming-convention
- public static readonly EMPTY: ParserRuleContext;
-
- public start: Token | null;
- public stop: Token | null;
- public exception?: RecognitionException;
-
- public constructor(parent: ParserRuleContext | null, invokingStateNumber: number);
-
- public get parent(): ParserRuleContext | null;
-
- public copyFrom(ctx: ParserRuleContext): void;
- public addChild(child: RuleContext): void;
- public removeLastChild(): void;
- public addTokenNode(token: Token): TerminalNode;
- public addErrorNode(badToken: Token): ErrorNode;
- public getChildCount(): number;
- public getChild If the set of expected tokens is not known and could not be computed,
- * this method returns {@code null}. If the state number is not known, this method returns -1. If the set of expected tokens is not known and could not be computed,
+ * this method returns {@code null}. If the state number is not known, this method returns -1. Used for XPath and tree pattern compilation. Used for XPath and tree pattern compilation.
+ * The non-negative numbers less than {@link #MIN_USER_CHANNEL_VALUE} are
+ * assigned to the predefined channels {@link #DEFAULT_CHANNEL} and
+ * {@link #HIDDEN_CHANNEL}. Errors from the lexer are never passed to the parser. Either you want to keep
+ * going or you do not upon token recognition error. If you do not want to
+ * continue lexing then you do not want to continue parsing. Just throw an
+ * exception not under {@link RecognitionException} and Java will naturally toss
+ * you all the way out of the recognizers. If you want to continue lexing then
+ * you should not throw an exception to the parser--it has already requested a
+ * token. Keep lexing until you get a valid one. Just report errors and keep
+ * going, looking for a valid token. If {@code ctx.getSourceInterval()} does not return a valid interval of
+ * tokens provided by this stream, the behavior is unspecified. If the specified {@code start} or {@code stop} token was not provided by
+ * this stream, or if the {@code stop} occurred before the {@code start}
+ * token, the behavior is unspecified. For streams which ensure that the {@link Token#getTokenIndex} method is
+ * accurate for all of its provided tokens, this method behaves like the
+ * following code. Other streams may implement this method in other ways
+ * provided the behavior is consistent with this at a high level.
+ * You can insert stuff, replace, and delete chunks. Note that the operations
+ * are done lazily--only if you convert the buffer to a {@link String} with
+ * {@link TokenStream#getText()}. This is very efficient because you are not
+ * moving data around all the time. As the buffer of tokens is converted to
+ * strings, the {@link #getText()} method(s) scan the input token stream and
+ * check to see if there is an operation at the current index. If so, the
+ * operation is done and then normal {@link String} rendering continues on the
+ * buffer. This is like having multiple Turing machine instruction streams
+ * (programs) operating on a single input tape. :)
+ * This rewriter makes no modifications to the token stream. It does not ask the
+ * stream to fill itself up nor does it advance the input cursor. The token
+ * stream {@link TokenStream#index()} will return the same value before and
+ * after any {@link #getText()} call.
+ * The rewriter only works on tokens that you have in the buffer and ignores the
+ * current input cursor. If you are buffering tokens on-demand, calling
+ * {@link #getText()} halfway through the input will only do rewrites for those
+ * tokens in the first half of the file.
+ * Since the operations are done lazily at {@link #getText}-time, operations do
+ * not screw up the token index values. That is, an insert operation at token
+ * index {@code i} does not change the index values for tokens
+ * {@code i}+1..n-1.
+ * Because operations never actually alter the buffer, you may always get the
+ * original token stream back without undoing anything. Since the instructions
+ * are queued up, you can easily simulate transactions and roll back any changes
+ * if there is an error just by removing instructions. For example,
+ * Then in the rules, you can execute (assuming rewriter is visible):
+ * You can also have multiple "instruction streams" and get multiple rewrites
+ * from a single pass over the input. Just name the instruction streams and use
+ * that name again when printing the buffer. This could be useful for generating
+ * a C file and also its header file--all from the same buffer:
+ * If you don't use named rewrite streams, a "default" stream is used as the
+ * first example shows.
- * No literal or symbol names are assigned to token types, so
- * {@link #getDisplayName(int)} returns the numeric value for all tokens
- * except {@link Token#EOF}. The resulting vocabulary instance returns {@code null} for
- * {@link #getLiteralName(int)} and {@link #getSymbolicName(int)}, and the
- * value from {@code tokenNames} for the display names.
+ * No literal or symbol names are assigned to token types, so
+ * {@link #getDisplayName(int)} returns the numeric value for all tokens
+ * except {@link Token#EOF}. The resulting vocabulary instance returns {@code null} for
+ * {@link #getLiteralName(int)} and {@link #getSymbolicName(int)}, and the
+ * value from {@code tokenNames} for the display names. If {@code context} is {@code null}, it is treated as
- * {@link ParserRuleContext//EMPTY}. If {@code context} is {@code null}, it is treated as
+ * {@link ParserRuleContext//EMPTY}.(recognizer: Recognizer
+ * TokenStream stream = ...;
+ * String text = stream.getText(ctx.getSourceInterval());
+ *
+ *
+ * @param ctx The context providing the source interval of tokens to get
+ * text for.
+ * @returns The text of all tokens within the source interval of {@code ctx}.
+ */
+ getText(ctx: RuleContext): string;
+
+ /**
+ * Return the text of all tokens in this stream between {@code start} and
+ * {@code stop} (inclusive).
+ *
+ *
+ * TokenStream stream = ...;
+ * String text = "";
+ * for (int i = start.getTokenIndex(); i <= stop.getTokenIndex(); i++) {
+ * text += stream.get(i).getText();
+ * }
+ *
+ *
+ * @param start The first token in the interval to get text for.
+ * @param stop The last token in the interval to get text for (inclusive).
+ * @returns The text of all tokens lying between the specified {@code start}
+ * and {@code stop} tokens.
+ *
+ * @throws UnsupportedOperationException if this stream does not support
+ * this method for the specified tokens
+ */
+ getText(start: Token | null, stop: Token | null): string;
}
diff --git a/src/TokenStreamRewriter.d.ts b/src/TokenStreamRewriter.d.ts
deleted file mode 100644
index 6f83540..0000000
--- a/src/TokenStreamRewriter.d.ts
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
- */
-
-/* eslint-disable max-classes-per-file */
-
-import { CommonTokenStream } from "./CommonTokenStream.js";
-import { Token } from "./Token.js";
-import { Interval } from "./misc/Interval.js";
-
-type Rewrites = Array
+ * CharStream input = new ANTLRFileStream("input");
+ * TLexer lex = new TLexer(input);
+ * CommonTokenStream tokens = new CommonTokenStream(lex);
+ * T parser = new T(tokens);
+ * TokenStreamRewriter rewriter = new TokenStreamRewriter(tokens);
+ * parser.startRule();
+ *
+ *
+ *
+ * Token t,u;
+ * ...
+ * rewriter.insertAfter(t, "text to put after t");}
+ * rewriter.insertAfter(u, "text after u");}
+ * System.out.println(rewriter.getText());
+ *
+ *
+ *
+ * rewriter.insertAfter("pass1", t, "text to put after t");}
+ * rewriter.insertAfter("pass2", u, "text after u");}
+ * System.out.println(rewriter.getText("pass1"));
+ * System.out.println(rewriter.getText("pass2"));
+ *
+ *
+ *
This method updates {@link //dipsIntoOuterContext} and - * {@link //hasSemanticContext} when necessary.
- */ - add(config, mergeCache) { - if (mergeCache === undefined) { - mergeCache = null; - } - if (this.readOnly) { - throw "This set is readonly"; - } - if (config.semanticContext !== SemanticContext.NONE) { - this.hasSemanticContext = true; - } - if (config.reachesIntoOuterContext > 0) { - this.dipsIntoOuterContext = true; - } - const existing = this.configLookup.add(config); - if (existing === config) { - this.cachedHashCode = -1; - this.configs.push(config); // track order here - return true; - } - // a previous (s,i,pi,_), merge with it and save result - const rootIsWildcard = !this.fullCtx; - const merged = merge(existing.context, config.context, rootIsWildcard, mergeCache); - /** - * no need to check for existing.context, config.context in cache - * since only way to create new graphs is "call rule" and here. We - * cache at both places - */ - existing.reachesIntoOuterContext = Math.max(existing.reachesIntoOuterContext, config.reachesIntoOuterContext); - // make sure to preserve the precedence filter suppression during the merge - if (config.precedenceFilterSuppressed) { - existing.precedenceFilterSuppressed = true; - } - existing.context = merged; // replace context; no need to alt mapping - return true; - } - - getStates() { - const states = new HashSet(); - for (let i = 0; i < this.configs.length; i++) { - states.add(this.configs[i].state); - } - return states; - } - - getPredicates() { - const preds = []; - for (let i = 0; i < this.configs.length; i++) { - const c = this.configs[i].semanticContext; - if (c !== SemanticContext.NONE) { - preds.push(c.semanticContext); - } - } - return preds; - } - - optimizeConfigs(interpreter) { - if (this.readOnly) { - throw "This set is readonly"; - } - if (this.configLookup.length === 0) { - return; - } - for (let i = 0; i < this.configs.length; i++) { - const config = this.configs[i]; - config.context = interpreter.getCachedContext(config.context); - } - } - - addAll(coll) { - for (let i = 0; i < coll.length; i++) { - this.add(coll[i]); - } - return false; - } - - equals(other) { - return this === other || - (other instanceof ATNConfigSet && - equalArrays(this.configs, other.configs) && - this.fullCtx === other.fullCtx && - this.uniqueAlt === other.uniqueAlt && - this.conflictingAlts === other.conflictingAlts && - this.hasSemanticContext === other.hasSemanticContext && - this.dipsIntoOuterContext === other.dipsIntoOuterContext); - } - - hashCode() { - const hash = new HashCode(); - hash.update(this.configs); - return hash.finish(); - } - - updateHashCode(hash) { - if (this.readOnly) { - if (this.cachedHashCode === -1) { - this.cachedHashCode = this.hashCode(); - } - hash.update(this.cachedHashCode); - } else { - hash.update(this.hashCode()); - } - } - - isEmpty() { - return this.configs.length === 0; - } - - contains(item) { - if (this.configLookup === null) { - throw "This method is not implemented for readonly sets."; - } - return this.configLookup.contains(item); - } - - containsFast(item) { - if (this.configLookup === null) { - throw "This method is not implemented for readonly sets."; - } - return this.configLookup.containsFast(item); - } - - clear() { - if (this.readOnly) { - throw "This set is readonly"; - } - this.configs = []; - this.cachedHashCode = -1; - this.configLookup = new HashSet(); - } - - setReadonly(readOnly) { - this.readOnly = readOnly; - if (readOnly) { - this.configLookup = null; // can't mod, no need for lookup cache - } - } - - toString() { - return arrayToString(this.configs) + - (this.hasSemanticContext ? ",hasSemanticContext=" + this.hasSemanticContext : "") + - (this.uniqueAlt !== ATN.INVALID_ALT_NUMBER ? ",uniqueAlt=" + this.uniqueAlt : "") + - (this.conflictingAlts !== null ? ",conflictingAlts=" + this.conflictingAlts : "") + - (this.dipsIntoOuterContext ? ",dipsIntoOuterContext" : ""); - } - - get items() { - return this.configs; - } - - get length() { - return this.configs.length; - } -} diff --git a/src/atn/ATNConfigSet.ts b/src/atn/ATNConfigSet.ts new file mode 100644 index 0000000..a9b2d5c --- /dev/null +++ b/src/atn/ATNConfigSet.ts @@ -0,0 +1,264 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/* eslint-disable jsdoc/require-returns, jsdoc/require-param */ + +import { ATN } from "./ATN.js"; +import { SemanticContext } from "./SemanticContext.js"; +import { merge } from "./PredictionContextUtils.js"; +import { HashSet } from "../misc/HashSet.js"; +import { HashCode } from "../misc/HashCode.js"; + +import { equalArrays, arrayToString } from "../utils/helpers.js"; +import { ATNConfig } from "./ATNConfig.js"; +import { BitSet } from "../misc/BitSet.js"; +import { DoubleDict } from "../utils/DoubleDict.js"; +import { PredictionContext } from "./PredictionContext.js"; +import { ATNState } from "./ATNState.js"; +import { ATNSimulator } from "./ATNSimulator.js"; + +const hashATNConfig = (c: ATNConfig) => { + return c.hashCodeForConfigSet(); +}; + +const equalATNConfigs = (a: ATNConfig, b: ATNConfig): boolean => { + if (a === b) { + return true; + } else if (a === null || b === null) { + return false; + } else { return a.equalsForConfigSet(b); } +}; + +/** + * Specialized {@link Set}{@code <}{@link ATNConfig}{@code >} that can track + * info about the set, with support for combining similar configurations using a + * graph-structured stack + */ +export class ATNConfigSet { + // Track the elements as they are added to the set; supports get(i)/// + public configs: ATNConfig[] = []; + + /** + * Used in parser and lexer. In lexer, it indicates we hit a pred + * while computing a closure operation. Don't make a DFA state from this + */ + public hasSemanticContext = false; + public dipsIntoOuterContext = false; + + /** + * Indicates that this configuration set is part of a full context + * LL prediction. It will be used to determine how to merge $. With SLL + * it's a wildcard whereas it is not for LL context merge + */ + public readonly fullCtx: boolean; + + public uniqueAlt = 0; + + /** + * The reason that we need this is because we don't want the hash map to use + * the standard hash code and equals. We need all configurations with the + * same + * {@code (s,i,_,semctx)} to be equal. Unfortunately, this key effectively + * doubles + * the number of objects associated with ATNConfigs. The other solution is + * to + * use a hash table that lets us specify the equals/hashCode operation. + * All configs but hashed by (s, i, _, pi) not including context. Wiped out + * when we go readonly as this set becomes a DFA state + */ + public configLookup = new HashSetThis method updates {@link dipsIntoOuterContext} and + * {@link hasSemanticContext} when necessary.
+ */ + public add(config: ATNConfig, + mergeCache?: DoubleDictThis cache makes a huge difference in memory and a little bit in speed. - * For the Java grammar on java.*, it dropped the memory requirements - * at the end from 25M to 16M. We don't store any of the full context - * graphs in the DFA because they are limited to local context only, - * but apparently there's a lot of repetition there as well. We optimize - * the config contexts before storing the config set in the DFA states - * by literally rebuilding them with cached subgraphs only.
- * - *I tried a cache for use during closure operations, that was - * whacked after each adaptivePredict(). It cost a little bit - * more time I think and doesn't save on the overall footprint - * so it's not worth the complexity.
- */ - this.atn = atn; - this.sharedContextCache = sharedContextCache; - return this; - } - - getCachedContext(context) { - if (this.sharedContextCache === null) { - return context; - } - const visited = new HashMap(); - return getCachedPredictionContext(context, this.sharedContextCache, visited); - } -} - -// Must distinguish between missing edge and edge we know leads nowhere/// -ATNSimulator.ERROR = new DFAState(0x7FFFFFFF, new ATNConfigSet()); diff --git a/src/atn/ATNSimulator.ts b/src/atn/ATNSimulator.ts new file mode 100644 index 0000000..34a68f8 --- /dev/null +++ b/src/atn/ATNSimulator.ts @@ -0,0 +1,61 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import { DFAState } from "../dfa/DFAState.js"; +import { getCachedPredictionContext } from "./PredictionContextUtils.js"; +import { HashMap } from "../misc/HashMap.js"; +import { ATN } from "./ATN.js"; +import { PredictionContextCache } from "./PredictionContextCache.js"; +import { PredictionContext } from "./PredictionContext.js"; + +export class ATNSimulator { + /** Must distinguish between missing edge and edge we know leads nowhere */ + + // eslint-disable-next-line @typescript-eslint/naming-convention + public static readonly ERROR = new DFAState(0x7FFFFFFF); + + public readonly atn: ATN; + + /** + * The context cache maps all PredictionContext objects that are == + * to a single cached copy. This cache is shared across all contexts + * in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet + * to use only cached nodes/graphs in addDFAState(). We don't want to + * fill this during closure() since there are lots of contexts that + * pop up but are not used ever again. It also greatly slows down closure(). + * + *This cache makes a huge difference in memory and a little bit in speed. + * For the Java grammar on java.*, it dropped the memory requirements + * at the end from 25M to 16M. We don't store any of the full context + * graphs in the DFA because they are limited to local context only, + * but apparently there's a lot of repetition there as well. We optimize + * the config contexts before storing the config set in the DFA states + * by literally rebuilding them with cached subgraphs only.
+ * + *I tried a cache for use during closure operations, that was + * whacked after each adaptivePredict(). It cost a little bit + * more time I think and doesn't save on the overall footprint + * so it's not worth the complexity.
+ */ + protected readonly sharedContextCache: PredictionContextCache | null = null; + + public constructor(atn: ATN, sharedContextCache: PredictionContextCache | null) { + + this.atn = atn; + this.sharedContextCache = sharedContextCache; + + return this; + } + + public getCachedContext(context: PredictionContext): PredictionContext { + if (this.sharedContextCache === null) { + return context; + } + const visited = new HashMapIf {@code speculative} is {@code true}, this method was called before - * {@link //consume} for the matched character. This method should call - * {@link //consume} before evaluating the predicate to ensure position + * {@link consume} for the matched character. This method should call + * {@link consume} before evaluating the predicate to ensure position * sensitive values, including {@link Lexer//getText}, {@link Lexer//getLine}, * and {@link Lexer}, properly reflect the current * lexer state. This method should restore {@code input} and the simulator * to the original state before returning (i.e. undo the actions made by the - * call to {@link //consume}.
+ * call to {@link consume}. * * @param input The input stream. * @param ruleIndex The rule containing the predicate. @@ -494,11 +547,11 @@ export class LexerATNSimulator extends ATNSimulator { * @param speculative {@code true} if the current index in {@code input} is * one character before the predicate's location. * - * @return {@code true} if the specified predicate evaluates to + * @returns `true` if the specified predicate evaluates to * {@code true}. */ - evaluatePredicate(input, ruleIndex, - predIndex, speculative) { + protected evaluatePredicate(input: CharStream, ruleIndex: number, predIndex: number, + speculative: boolean): boolean { // assume true if no recognizer was provided if (this.recog === null) { return true; @@ -512,6 +565,7 @@ export class LexerATNSimulator extends ATNSimulator { const marker = input.mark(); try { this.consume(input); + return this.recog.sempred(null, ruleIndex, predIndex); } finally { this.column = savedColumn; @@ -521,14 +575,15 @@ export class LexerATNSimulator extends ATNSimulator { } } - captureSimState(settings, input, dfaState) { + protected captureSimState(settings: LexerATNSimulator.SimState, input: CharStream, + dfaState: DFAState | null): void { settings.index = input.index; settings.line = this.line; settings.column = this.column; settings.dfaState = dfaState; } - addDFAEdge(from_, tk, to, configs) { + protected addDFAEdge(from_: DFAState, tk: number, to: DFAState | null, configs?: ATNConfigSet | null): DFAState { if (to === undefined) { to = null; } @@ -558,21 +613,26 @@ export class LexerATNSimulator extends ATNSimulator { return to; } } + // add the edge if (tk < LexerATNSimulator.MIN_DFA_EDGE || tk > LexerATNSimulator.MAX_DFA_EDGE) { // Only track edges within the DFA bounds - return to; + return to!; } + if (LexerATNSimulator.debug) { console.log("EDGE " + from_ + " -> " + to + " upon " + tk); } + if (from_.edges === null) { // make room for tokens 1..n and -1 masquerading as index 0 - from_.edges = []; + from_.edges = new Array(LexerATNSimulator.MAX_DFA_EDGE - LexerATNSimulator.MIN_DFA_EDGE + 1); + from_.edges.fill(null); } + from_.edges[tk - LexerATNSimulator.MIN_DFA_EDGE] = to; // connect - return to; + return to!; } /** @@ -581,66 +641,67 @@ export class LexerATNSimulator extends ATNSimulator { * configuration containing an ATN rule stop state. Later, when * traversing the DFA, we will know which rule to accept. */ - addDFAState(configs) { - const proposed = new DFAState(null, configs); + protected addDFAState(configs: ATNConfigSet): DFAState { + const proposed = new DFAState(configs); let firstConfigWithRuleStopState = null; - for (let i = 0; i < configs.items.length; i++) { - const cfg = configs.items[i]; + for (const cfg of configs.items) { if (cfg.state instanceof RuleStopState) { firstConfigWithRuleStopState = cfg; break; } } + if (firstConfigWithRuleStopState !== null) { proposed.isAcceptState = true; - proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor; + proposed.lexerActionExecutor = (firstConfigWithRuleStopState as LexerATNConfig).lexerActionExecutor; proposed.prediction = this.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]; } + const dfa = this.decisionToDFA[this.mode]; const existing = dfa.states.get(proposed); if (existing !== null) { return existing; } + const newState = proposed; newState.stateNumber = dfa.states.length; configs.setReadonly(true); newState.configs = configs; dfa.states.add(newState); - return newState; - } - - getDFA(mode) { - return this.decisionToDFA[mode]; - } - // Get the text matched so far for the current token. - getText(input) { - // index is first lookahead char, don't include. - return input.getText(this.startIndex, input.index - 1); - } - - consume(input) { - const curChar = input.LA(1); - if (curChar === "\n".charCodeAt(0)) { - this.line += 1; - this.column = 0; - } else { - this.column += 1; - } - input.consume(); + return newState; } +} - getTokenName(tt) { - if (tt === -1) { - return "EOF"; - } else { - return "'" + String.fromCharCode(tt) + "'"; +// eslint-disable-next-line @typescript-eslint/no-namespace +export namespace LexerATNSimulator { + /** + * When we hit an accept state in either the DFA or the ATN, we + * have to notify the character stream to start buffering characters + * via {@link IntStream#mark} and record the current state. The current sim state + * includes the current index into the input, the current line, + * and current character position in that line. Note that the Lexer is + * tracking the starting line and characterization of the token. These + * variables track the "state" of the simulator when it hits an accept state. + * + *We track these variables separately for the DFA and ATN simulation + * because the DFA simulation often has to fail over to the ATN + * simulation. If the ATN simulation fails, we need the DFA to fall + * back to its previously accepted state, if any. If the ATN succeeds, + * then the ATN does the accept and the DFA simulator that invoked it + * can simply return the predicted token type.
+ */ + export class SimState { + public index = -1; + public line = 0; + public column = -1; + public dfaState: DFAState | null = null; + + public reset(): void { + this.index = -1; + this.line = 0; + this.column = -1; + this.dfaState = null; } } } - -LexerATNSimulator.debug = false; -LexerATNSimulator.dfa_debug = false; - -LexerATNSimulator.MIN_DFA_EDGE = 0; -LexerATNSimulator.MAX_DFA_EDGE = 127; // forces unicode to stay in ATN diff --git a/src/atn/LexerAction.d.ts b/src/atn/LexerAction.d.ts deleted file mode 100644 index f4304d9..0000000 --- a/src/atn/LexerAction.d.ts +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -export declare class LexerAction { - public readonly actionType: number; - public readonly isPositionDependent: boolean; - - public constructor(actionType: number); - - public hashCode(): number; - public equals(obj: unknown): boolean; -} diff --git a/src/atn/LexerAction.js b/src/atn/LexerAction.ts similarity index 55% rename from src/atn/LexerAction.js rename to src/atn/LexerAction.ts index 636da1e..bc2aad4 100644 --- a/src/atn/LexerAction.js +++ b/src/atn/LexerAction.ts @@ -4,25 +4,33 @@ * can be found in the LICENSE.txt file in the project root. */ +import { Lexer } from "../Lexer.js"; import { HashCode } from "../misc/HashCode.js"; -export class LexerAction { - constructor(action) { +// TODO: make LexerAction an interface +export abstract class LexerAction { + public readonly actionType: number; + public isPositionDependent: boolean; + + public constructor(action: number) { this.actionType = action; this.isPositionDependent = false; } - hashCode() { + public hashCode(): number { const hash = new HashCode(); this.updateHashCode(hash); + return hash.finish(); } - updateHashCode(hash) { + public updateHashCode(hash: HashCode): void { hash.update(this.actionType); } - equals(other) { + public equals(other: unknown): boolean { return this === other; } + + public abstract execute(lexer: Lexer): void; } diff --git a/src/atn/LexerActionExecutor.js b/src/atn/LexerActionExecutor.ts similarity index 80% rename from src/atn/LexerActionExecutor.js rename to src/atn/LexerActionExecutor.ts index 28b42ba..be3436a 100644 --- a/src/atn/LexerActionExecutor.js +++ b/src/atn/LexerActionExecutor.ts @@ -4,10 +4,18 @@ * can be found in the LICENSE.txt file in the project root. */ -import { LexerIndexedCustomAction } from './LexerIndexedCustomAction.js'; +/* eslint-disable jsdoc/require-param */ + +import { LexerIndexedCustomAction } from "./LexerIndexedCustomAction.js"; import { HashCode } from "../misc/HashCode.js"; +import { LexerAction } from "./LexerAction.js"; +import { CharStream } from "../CharStream.js"; +import { Lexer } from "../Lexer.js"; + +export class LexerActionExecutor /*implements*/ extends LexerAction { + private lexerActions: LexerAction[]; + private cachedHashCode: number; -export class LexerActionExecutor { /** * Represents an executor for a sequence of lexer actions which traversed during * the matching operation of a lexer rule (token). @@ -16,17 +24,44 @@ export class LexerActionExecutor { * efficiently, ensuring that actions appearing only at the end of the rule do * not cause bloating of the {@link DFA} created for the lexer. */ - constructor(lexerActions) { + public constructor(lexerActions: LexerAction[]) { + super(-1); + this.lexerActions = lexerActions === null ? [] : lexerActions; /** - * Caches the result of {@link //hashCode} since the hash code is an element + * Caches the result of {@link hashCode} since the hash code is an element * of the performance-critical {@link LexerATNConfig//hashCode} operation */ this.cachedHashCode = HashCode.hashStuff(lexerActions); // "".join([str(la) for la in + // lexerActions])) return this; } + /** + * Creates a {@link LexerActionExecutor} which executes the actions for + * the input {@code lexerActionExecutor} followed by a specified + * {@code lexerAction}. + * + * @param lexerActionExecutor The executor for actions already traversed by + * the lexer while matching a token within a particular + * {@link LexerATNConfig}. If this is {@code null}, the method behaves as + * though it were an empty executor. + * @param lexerAction The lexer action to execute after the actions + * specified in {@code lexerActionExecutor}. + * + * @returns {LexerActionExecutor} A {@link LexerActionExecutor} for executing the combine actions + * of {@code lexerActionExecutor} and {@code lexerAction}. + */ + public static append(lexerActionExecutor: LexerActionExecutor, lexerAction: LexerAction): LexerActionExecutor { + if (lexerActionExecutor === null) { + return new LexerActionExecutor([lexerAction]); + } + const lexerActions = lexerActionExecutor.lexerActions.concat([lexerAction]); + + return new LexerActionExecutor(lexerActions); + } + /** * Creates a {@link LexerActionExecutor} which encodes the current offset * for position-dependent lexer actions. @@ -53,10 +88,10 @@ export class LexerActionExecutor { * @param offset The current offset to assign to all position-dependent * lexer actions which do not already have offsets assigned. * - * @return {LexerActionExecutor} A {@link LexerActionExecutor} which stores input stream offsets + * @returns {LexerActionExecutor} A {@link LexerActionExecutor} which stores input stream offsets * for all position-dependent lexer actions. */ - fixOffsetBeforeMatch(offset) { + public fixOffsetBeforeMatch(offset: number): LexerActionExecutor { let updatedLexerActions = null; for (let i = 0; i < this.lexerActions.length; i++) { if (this.lexerActions[i].isPositionDependent && @@ -94,22 +129,26 @@ export class LexerActionExecutor { * {@link IntStream//seek} to set the {@code input} position to the beginning * of the token. */ - execute(lexer, input, startIndex) { + public execute(lexer: Lexer, input?: CharStream, startIndex?: number): void { + if (input === undefined || startIndex === undefined) { + return; + } + let requiresSeek = false; const stopIndex = input.index; try { - for (let i = 0; i < this.lexerActions.length; i++) { - let lexerAction = this.lexerActions[i]; + for (const lexerAction of this.lexerActions) { + let action = lexerAction; if (lexerAction instanceof LexerIndexedCustomAction) { const offset = lexerAction.offset; input.seek(startIndex + offset); - lexerAction = lexerAction.action; + action = lexerAction.action; requiresSeek = (startIndex + offset) !== stopIndex; } else if (lexerAction.isPositionDependent) { input.seek(stopIndex); requiresSeek = false; } - lexerAction.execute(lexer); + action.execute(lexer); } } finally { if (requiresSeek) { @@ -118,22 +157,18 @@ export class LexerActionExecutor { } } - hashCode() { + public override hashCode(): number { return this.cachedHashCode; } - updateHashCode(hash) { - hash.update(this.cachedHashCode); - } - - equals(other) { + public override equals(other: unknown): boolean { if (this === other) { return true; } else if (!(other instanceof LexerActionExecutor)) { return false; - } else if (this.cachedHashCode != other.cachedHashCode) { + } else if (this.cachedHashCode !== other.cachedHashCode) { return false; - } else if (this.lexerActions.length != other.lexerActions.length) { + } else if (this.lexerActions.length !== other.lexerActions.length) { return false; } else { const numActions = this.lexerActions.length; @@ -142,30 +177,9 @@ export class LexerActionExecutor { return false; } } + return true; } } - /** - * Creates a {@link LexerActionExecutor} which executes the actions for - * the input {@code lexerActionExecutor} followed by a specified - * {@code lexerAction}. - * - * @param lexerActionExecutor The executor for actions already traversed by - * the lexer while matching a token within a particular - * {@link LexerATNConfig}. If this is {@code null}, the method behaves as - * though it were an empty executor. - * @param lexerAction The lexer action to execute after the actions - * specified in {@code lexerActionExecutor}. - * - * @return {LexerActionExecutor} A {@link LexerActionExecutor} for executing the combine actions - * of {@code lexerActionExecutor} and {@code lexerAction}. - */ - static append(lexerActionExecutor, lexerAction) { - if (lexerActionExecutor === null) { - return new LexerActionExecutor([lexerAction]); - } - const lexerActions = lexerActionExecutor.lexerActions.concat([lexerAction]); - return new LexerActionExecutor(lexerActions); - } } diff --git a/src/atn/LexerActionType.js b/src/atn/LexerActionType.ts similarity index 90% rename from src/atn/LexerActionType.js rename to src/atn/LexerActionType.ts index f683920..fff08b5 100644 --- a/src/atn/LexerActionType.js +++ b/src/atn/LexerActionType.ts @@ -4,6 +4,8 @@ * can be found in the LICENSE.txt file in the project root. */ +/* eslint-disable @typescript-eslint/naming-convention */ + export const LexerActionType = { // The type of a {@link LexerChannelAction} action. CHANNEL: 0, @@ -20,5 +22,5 @@ export const LexerActionType = { //The type of a {@link LexerSkipAction} action. SKIP: 6, //The type of a {@link LexerTypeAction} action. - TYPE: 7 -}; + TYPE: 7, +} as const; diff --git a/src/atn/LexerChannelAction.js b/src/atn/LexerChannelAction.ts similarity index 69% rename from src/atn/LexerChannelAction.js rename to src/atn/LexerChannelAction.ts index 8e51c52..ee16312 100644 --- a/src/atn/LexerChannelAction.js +++ b/src/atn/LexerChannelAction.ts @@ -4,34 +4,42 @@ * can be found in the LICENSE.txt file in the project root. */ +/* eslint-disable jsdoc/require-param */ + import { LexerActionType } from "./LexerActionType.js"; import { LexerAction } from "./LexerAction.js"; +import { Lexer } from "../Lexer.js"; +import { HashCode } from "../misc/HashCode.js"; /** * Implements the {@code channel} lexer action by calling * {@link Lexer//setChannel} with the assigned channel. * Constructs a new {@code channel} action with the specified channel value. + * * @param channel The channel value to pass to {@link Lexer//setChannel} */ export class LexerChannelAction extends LexerAction { - constructor(channel) { + public readonly channel: number; + + public constructor(channel: number) { super(LexerActionType.CHANNEL); this.channel = channel; } /** *This action is implemented by calling {@link Lexer//setChannel} with the - * value provided by {@link //getChannel}.
+ * value provided by {@link getChannel}. */ - execute(lexer) { + public execute(lexer: Lexer): void { + // eslint-disable-next-line no-underscore-dangle lexer._channel = this.channel; } - updateHashCode(hash) { + public override updateHashCode(hash: HashCode): void { hash.update(this.actionType, this.channel); } - equals(other) { + public override equals(other: unknown): boolean { if (this === other) { return true; } else if (!(other instanceof LexerChannelAction)) { @@ -41,7 +49,7 @@ export class LexerChannelAction extends LexerAction { } } - toString() { + public override toString(): string { return "channel(" + this.channel + ")"; } } diff --git a/src/atn/LexerCustomAction.js b/src/atn/LexerCustomAction.ts similarity index 79% rename from src/atn/LexerCustomAction.js rename to src/atn/LexerCustomAction.ts index babcf3f..6feae99 100644 --- a/src/atn/LexerCustomAction.js +++ b/src/atn/LexerCustomAction.ts @@ -4,8 +4,12 @@ * can be found in the LICENSE.txt file in the project root. */ -import { LexerActionType } from "../atn/LexerActionType.js"; +/* eslint-disable jsdoc/require-param */ + +import { LexerActionType } from "./LexerActionType.js"; import { LexerAction } from "./LexerAction.js"; +import { Lexer } from "../Lexer.js"; +import { HashCode } from "../misc/HashCode.js"; /** * Executes a custom lexer action by calling {@link Recognizer//action} with the @@ -18,6 +22,9 @@ import { LexerAction } from "./LexerAction.js"; * command argument could not be evaluated when the grammar was compiled. */ export class LexerCustomAction extends LexerAction { + private ruleIndex: number; + private actionIndex: number; + /** * Constructs a custom lexer action with the specified rule and action * indexes. @@ -27,7 +34,7 @@ export class LexerCustomAction extends LexerAction { * @param actionIndex The action index to use for calls to * {@link Recognizer//action}. */ - constructor(ruleIndex, actionIndex) { + public constructor(ruleIndex: number, actionIndex: number) { super(LexerActionType.CUSTOM); this.ruleIndex = ruleIndex; this.actionIndex = actionIndex; @@ -38,15 +45,15 @@ export class LexerCustomAction extends LexerAction { *Custom actions are implemented by calling {@link Lexer//action} with the * appropriate rule and action indexes.
*/ - execute(lexer) { + public override execute(lexer: Lexer): void { lexer.action(null, this.ruleIndex, this.actionIndex); } - updateHashCode(hash) { + public override updateHashCode(hash: HashCode): void { hash.update(this.actionType, this.ruleIndex, this.actionIndex); } - equals(other) { + public override equals(other: unknown): boolean { if (this === other) { return true; } else if (!(other instanceof LexerCustomAction)) { diff --git a/src/atn/LexerIndexedCustomAction.js b/src/atn/LexerIndexedCustomAction.ts similarity index 80% rename from src/atn/LexerIndexedCustomAction.js rename to src/atn/LexerIndexedCustomAction.ts index 263d062..0933d7d 100644 --- a/src/atn/LexerIndexedCustomAction.js +++ b/src/atn/LexerIndexedCustomAction.ts @@ -4,6 +4,12 @@ * can be found in the LICENSE.txt file in the project root. */ +/* eslint-disable jsdoc/require-param */ + +import { LexerAction } from "./LexerAction.js"; +import { Lexer } from "../Lexer.js"; +import { HashCode } from "../misc/HashCode.js"; + /** * This implementation of {@link LexerAction} is used for tracking input offsets * for position-dependent actions within a {@link LexerActionExecutor}. @@ -27,10 +33,11 @@ * input {@link CharStream}. */ -import { LexerAction } from "./LexerAction.js"; - export class LexerIndexedCustomAction extends LexerAction { - constructor(offset, action) { + public readonly offset: number; + public readonly action: LexerAction; + + public constructor(offset: number, action: LexerAction) { super(action.actionType); this.offset = offset; this.action = action; @@ -38,19 +45,19 @@ export class LexerIndexedCustomAction extends LexerAction { } /** - *This method calls {@link //execute} on the result of {@link //getAction} + *
This method calls {@link execute} on the result of {@link getAction} * using the provided {@code lexer}.
*/ - execute(lexer) { + public execute(lexer: Lexer): void { // assume the input stream position was properly set by the calling code this.action.execute(lexer); } - updateHashCode(hash) { + public override updateHashCode(hash: HashCode): void { hash.update(this.actionType, this.offset, this.action); } - equals(other) { + public override equals(other: unknown): boolean { if (this === other) { return true; } else if (!(other instanceof LexerIndexedCustomAction)) { diff --git a/src/atn/LexerModeAction.js b/src/atn/LexerModeAction.ts similarity index 64% rename from src/atn/LexerModeAction.js rename to src/atn/LexerModeAction.ts index a0318d3..0a1f437 100644 --- a/src/atn/LexerModeAction.js +++ b/src/atn/LexerModeAction.ts @@ -4,32 +4,38 @@ * can be found in the LICENSE.txt file in the project root. */ -import { LexerActionType } from "../atn/LexerActionType.js"; +/* eslint-disable jsdoc/require-param */ + +import { LexerActionType } from "./LexerActionType.js"; import { LexerAction } from "./LexerAction.js"; +import { Lexer } from "../Lexer.js"; +import { HashCode } from "../misc/HashCode.js"; /** * Implements the {@code mode} lexer action by calling {@link Lexer//mode} with * the assigned mode */ export class LexerModeAction extends LexerAction { - constructor(mode) { + private readonly mode: number; + + public constructor(mode: number) { super(LexerActionType.MODE); this.mode = mode; } /** *This action is implemented by calling {@link Lexer//mode} with the - * value provided by {@link //getMode}.
+ * value provided by {@link getMode}. */ - execute(lexer) { + public override execute(lexer: Lexer): void { lexer.mode(this.mode); } - updateHashCode(hash) { + public override updateHashCode(hash: HashCode): void { hash.update(this.actionType, this.mode); } - equals(other) { + public override equals(other: unknown): boolean { if (this === other) { return true; } else if (!(other instanceof LexerModeAction)) { @@ -39,7 +45,7 @@ export class LexerModeAction extends LexerAction { } } - toString() { + public override toString(): string { return "mode(" + this.mode + ")"; } } diff --git a/src/atn/LexerMoreAction.js b/src/atn/LexerMoreAction.ts similarity index 58% rename from src/atn/LexerMoreAction.js rename to src/atn/LexerMoreAction.ts index 9f130f7..07a6322 100644 --- a/src/atn/LexerMoreAction.js +++ b/src/atn/LexerMoreAction.ts @@ -4,30 +4,34 @@ * can be found in the LICENSE.txt file in the project root. */ -import { LexerActionType } from "../atn/LexerActionType.js"; +/* eslint-disable jsdoc/require-param */ + +import { LexerActionType } from "./LexerActionType.js"; import { LexerAction } from "./LexerAction.js"; +import { Lexer } from "../Lexer.js"; /** * Implements the {@code more} lexer action by calling {@link Lexer//more}. * *The {@code more} command does not have any parameters, so this action is - * implemented as a singleton instance exposed by {@link //INSTANCE}.
+ * implemented as a singleton instance exposed by {@link INSTANCE}. */ export class LexerMoreAction extends LexerAction { - constructor() { + // eslint-disable-next-line @typescript-eslint/naming-convention + public static readonly INSTANCE = new LexerMoreAction(); + + public constructor() { super(LexerActionType.MORE); } /** *This action is implemented by calling {@link Lexer//popMode}.
*/ - execute(lexer) { + public override execute(lexer: Lexer): void { lexer.more(); } - toString() { + public override toString(): string { return "more"; } } - -LexerMoreAction.INSTANCE = new LexerMoreAction(); diff --git a/src/atn/LexerPopModeAction.js b/src/atn/LexerPopModeAction.ts similarity index 59% rename from src/atn/LexerPopModeAction.js rename to src/atn/LexerPopModeAction.ts index 14b4003..7d20f4d 100644 --- a/src/atn/LexerPopModeAction.js +++ b/src/atn/LexerPopModeAction.ts @@ -4,30 +4,34 @@ * can be found in the LICENSE.txt file in the project root. */ -import { LexerActionType } from "../atn/LexerActionType.js"; +/* eslint-disable jsdoc/require-param */ + +import { LexerActionType } from "./LexerActionType.js"; import { LexerAction } from "./LexerAction.js"; +import { Lexer } from "../Lexer.js"; /** * Implements the {@code popMode} lexer action by calling {@link Lexer//popMode}. * *The {@code popMode} command does not have any parameters, so this action is - * implemented as a singleton instance exposed by {@link //INSTANCE}.
+ * implemented as a singleton instance exposed by {@link INSTANCE}. */ export class LexerPopModeAction extends LexerAction { - constructor() { + // eslint-disable-next-line @typescript-eslint/naming-convention + public static readonly INSTANCE = new LexerPopModeAction(); + + public constructor() { super(LexerActionType.POP_MODE); } /** *This action is implemented by calling {@link Lexer//popMode}.
*/ - execute(lexer) { + public override execute(lexer: Lexer): void { lexer.popMode(); } - toString() { + public override toString(): string { return "popMode"; } } - -LexerPopModeAction.INSTANCE = new LexerPopModeAction(); diff --git a/src/atn/LexerPushModeAction.js b/src/atn/LexerPushModeAction.ts similarity index 65% rename from src/atn/LexerPushModeAction.js rename to src/atn/LexerPushModeAction.ts index 593c1a1..4fe80af 100644 --- a/src/atn/LexerPushModeAction.js +++ b/src/atn/LexerPushModeAction.ts @@ -4,32 +4,38 @@ * can be found in the LICENSE.txt file in the project root. */ -import { LexerActionType } from "../atn/LexerActionType.js"; +/* eslint-disable jsdoc/require-param */ + +import { LexerActionType } from "./LexerActionType.js"; import { LexerAction } from "./LexerAction.js"; +import { Lexer } from "../Lexer.js"; +import { HashCode } from "../misc/HashCode.js"; /** * Implements the {@code pushMode} lexer action by calling * {@link Lexer//pushMode} with the assigned mode */ export class LexerPushModeAction extends LexerAction { - constructor(mode) { + private readonly mode: number; + + public constructor(mode: number) { super(LexerActionType.PUSH_MODE); this.mode = mode; } /** *This action is implemented by calling {@link Lexer//pushMode} with the - * value provided by {@link //getMode}.
+ * value provided by {@link getMode}. */ - execute(lexer) { + public override execute(lexer: Lexer): void { lexer.pushMode(this.mode); } - updateHashCode(hash) { + public override updateHashCode(hash: HashCode): void { hash.update(this.actionType, this.mode); } - equals(other) { + public override equals(other: unknown): boolean { if (this === other) { return true; } else if (!(other instanceof LexerPushModeAction)) { @@ -39,7 +45,7 @@ export class LexerPushModeAction extends LexerAction { } } - toString() { + public override toString(): string { return "pushMode(" + this.mode + ")"; } } diff --git a/src/atn/LexerSkipAction.js b/src/atn/LexerSkipAction.ts similarity index 53% rename from src/atn/LexerSkipAction.js rename to src/atn/LexerSkipAction.ts index 37df5f9..7e3975f 100644 --- a/src/atn/LexerSkipAction.js +++ b/src/atn/LexerSkipAction.ts @@ -4,28 +4,30 @@ * can be found in the LICENSE.txt file in the project root. */ -import { LexerActionType } from "../atn/LexerActionType.js"; +import { LexerActionType } from "./LexerActionType.js"; import { LexerAction } from "./LexerAction.js"; +import { Lexer } from "../Lexer.js"; /** * Implements the {@code skip} lexer action by calling {@link Lexer//skip}. * *The {@code skip} command does not have any parameters, so this action is - * implemented as a singleton instance exposed by {@link //INSTANCE}.
+ * implemented as a singleton instance exposed by {@link INSTANCE}. */ export class LexerSkipAction extends LexerAction { - constructor() { + /** Provides a singleton instance of this parameter-less lexer action. */ + // eslint-disable-next-line @typescript-eslint/naming-convention + public static readonly INSTANCE = new LexerSkipAction(); + + public constructor() { super(LexerActionType.SKIP); } - execute(lexer) { + public execute(lexer: Lexer): void { lexer.skip(); } - toString() { + public override toString(): string { return "skip"; } } - -// Provides a singleton instance of this parameterless lexer action. -LexerSkipAction.INSTANCE = new LexerSkipAction(); diff --git a/src/atn/LexerTypeAction.js b/src/atn/LexerTypeAction.ts similarity index 65% rename from src/atn/LexerTypeAction.js rename to src/atn/LexerTypeAction.ts index 041e446..705de67 100644 --- a/src/atn/LexerTypeAction.js +++ b/src/atn/LexerTypeAction.ts @@ -6,6 +6,8 @@ import { LexerActionType } from "./LexerActionType.js"; import { LexerAction } from "./LexerAction.js"; +import { Lexer } from "../Lexer.js"; +import { HashCode } from "../misc/HashCode.js"; /** * Implements the {@code type} lexer action by calling {@link Lexer//setType} @@ -13,20 +15,23 @@ import { LexerAction } from "./LexerAction.js"; */ export class LexerTypeAction extends LexerAction { - constructor(type) { + public readonly type: number; + + public constructor(type: number) { super(LexerActionType.TYPE); this.type = type; } - execute(lexer) { - lexer.type = this.type; + public override execute(lexer: Lexer): void { + // eslint-disable-next-line no-underscore-dangle + lexer._type = this.type; } - updateHashCode(hash) { + public override updateHashCode(hash: HashCode): void { hash.update(this.actionType, this.type); } - equals(other) { + public override equals(other: unknown): boolean { if (this === other) { return true; } else if (!(other instanceof LexerTypeAction)) { @@ -36,7 +41,7 @@ export class LexerTypeAction extends LexerAction { } } - toString() { + public override toString(): string { return "type(" + this.type + ")"; } } diff --git a/src/atn/LoopEndState.d.ts b/src/atn/LoopEndState.d.ts deleted file mode 100644 index 2b545e1..0000000 --- a/src/atn/LoopEndState.d.ts +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { ATNState } from "./ATNState.js"; - -/** - * Mark the end of a * or + loop - */ -export class LoopEndState extends ATNState { -} diff --git a/src/atn/LoopEndState.js b/src/atn/LoopEndState.ts similarity index 81% rename from src/atn/LoopEndState.js rename to src/atn/LoopEndState.ts index b4c490c..866de40 100644 --- a/src/atn/LoopEndState.js +++ b/src/atn/LoopEndState.ts @@ -11,7 +11,9 @@ import { ATNStateType } from "./ATNStateType.js"; * Mark the end of a * or + loop */ export class LoopEndState extends ATNState { - get stateType() { + public loopBackState: ATNState | null = null; + + public override get stateType(): number { return ATNStateType.LOOP_END; } diff --git a/src/atn/NotSetTransition.d.ts b/src/atn/NotSetTransition.d.ts deleted file mode 100644 index 1813581..0000000 --- a/src/atn/NotSetTransition.d.ts +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { ATNState } from "../atn/ATNState.js"; -import { IntervalSet } from "../misc/IntervalSet.js"; -import { SetTransition } from "./SetTransition.js"; - -export declare class NotSetTransition extends SetTransition { - public constructor(target: ATNState, set: IntervalSet); - - public matches(symbol: number, minVocabSymbol: number, maxVocabSymbol: number): boolean; - public toString(): string; -} diff --git a/src/atn/NotSetTransition.js b/src/atn/NotSetTransition.ts similarity index 55% rename from src/atn/NotSetTransition.js rename to src/atn/NotSetTransition.ts index b7f1b74..3e84a36 100644 --- a/src/atn/NotSetTransition.js +++ b/src/atn/NotSetTransition.ts @@ -4,21 +4,26 @@ * can be found in the LICENSE.txt file in the project root. */ +import { IntervalSet } from "../misc/IntervalSet.js"; +import { ATNState } from "./ATNState.js"; import { SetTransition } from "./SetTransition.js"; import { TransitionType } from "./TransitionType.js"; export class NotSetTransition extends SetTransition { - constructor(target, set) { + public constructor(target: ATNState, set: IntervalSet) { super(target, set); - this.serializationType = TransitionType.NOT_SET; } - matches(symbol, minVocabSymbol, maxVocabSymbol) { + public override get serializationType(): number { + return TransitionType.NOT_SET; + } + + public override matches(symbol: number, minVocabSymbol: number, maxVocabSymbol: number): boolean { return symbol >= minVocabSymbol && symbol <= maxVocabSymbol && !super.matches(symbol, minVocabSymbol, maxVocabSymbol); } - toString() { - return '~' + super.toString(); + public override toString(): string { + return "~" + super.toString(); } } diff --git a/src/atn/OrderedATNConfigSet.js b/src/atn/OrderedATNConfigSet.ts similarity index 93% rename from src/atn/OrderedATNConfigSet.js rename to src/atn/OrderedATNConfigSet.ts index 5ecc8df..a33e765 100644 --- a/src/atn/OrderedATNConfigSet.js +++ b/src/atn/OrderedATNConfigSet.ts @@ -8,7 +8,7 @@ import { ATNConfigSet } from "./ATNConfigSet.js"; import { HashSet } from "../misc/HashSet.js"; export class OrderedATNConfigSet extends ATNConfigSet { - constructor() { + public constructor() { super(); this.configLookup = new HashSet(); } diff --git a/src/atn/ParserATNSimulator.d.ts b/src/atn/ParserATNSimulator.d.ts deleted file mode 100644 index f26892e..0000000 --- a/src/atn/ParserATNSimulator.d.ts +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { Parser } from "../Parser.js"; -import { TokenStream } from "../TokenStream.js"; -import { ParserRuleContext } from "../ParserRuleContext.js"; -import { DFA } from "../dfa/DFA.js"; -import { ATN } from "./ATN.js"; -import { ATNSimulator } from "./ATNSimulator.js"; -import { PredictionContextCache } from "./PredictionContextCache.js"; -import { PredictionMode } from "./PredictionMode.js"; - -export declare class ParserATNSimulator extends ATNSimulator { - public predictionMode: PredictionMode; - public decisionToDFA: DFA[]; - public atn: ATN; - public debug?: boolean; - - // eslint-disable-next-line @typescript-eslint/naming-convention - public trace_atn_sim?: boolean; - - public constructor(recog: Parser, atn: ATN, decisionToDFA: DFA[], sharedContextCache: PredictionContextCache); - public adaptivePredict(input: TokenStream, decision: number, outerContext: ParserRuleContext): number; -} diff --git a/src/atn/ParserATNSimulator.js b/src/atn/ParserATNSimulator.ts similarity index 77% rename from src/atn/ParserATNSimulator.js rename to src/atn/ParserATNSimulator.ts index 0749f4c..3a6b134 100644 --- a/src/atn/ParserATNSimulator.js +++ b/src/atn/ParserATNSimulator.ts @@ -4,34 +4,51 @@ * can be found in the LICENSE.txt file in the project root. */ -import { NoViableAltException } from '../NoViableAltException.js'; -import { Token } from '../Token.js'; -import { DFAState } from '../dfa/DFAState.js'; -import { PredPrediction } from '../dfa/PredPrediction.js'; +/* eslint-disable @typescript-eslint/naming-convention, no-underscore-dangle, jsdoc/require-returns */ +/* eslint-disable jsdoc/require-param */ + +import { NoViableAltException } from "../NoViableAltException.js"; +import { Token } from "../Token.js"; +import { DFAState } from "../dfa/DFAState.js"; import { BitSet } from "../misc/BitSet.js"; import { HashSet } from "../misc/HashSet.js"; -import { Interval } from '../misc/Interval.js'; +import { Interval } from "../misc/Interval.js"; import { DoubleDict } from "../utils/DoubleDict.js"; -import { arrayToString } from "../utils/arrayToString.js"; -import { ATN } from './ATN.js'; -import { ATNConfig } from './ATNConfig.js'; -import { ATNConfigSet } from './ATNConfigSet.js'; -import { ATNSimulator } from './ATNSimulator.js'; +import { ATN } from "./ATN.js"; +import { ATNConfig } from "./ATNConfig.js"; +import { ATNConfigSet } from "./ATNConfigSet.js"; +import { ATNSimulator } from "./ATNSimulator.js"; import { ATNStateType } from "./ATNStateType.js"; -import { ActionTransition } from './ActionTransition.js'; +import { ActionTransition } from "./ActionTransition.js"; import { AtomTransition } from "./AtomTransition.js"; -import { NotSetTransition } from './NotSetTransition.js'; -import { PredictionContext } from './PredictionContext.js'; -import { predictionContextFromRuleContext } from './PredictionContextUtils.js'; -import { PredictionMode } from './PredictionMode.js'; -import { RuleContext } from './RuleContext.js'; -import { RuleStopState } from './RuleStopState.js'; -import { RuleTransition } from './RuleTransition.js'; -import { SemanticContext } from './SemanticContext.js'; -import { SetTransition } from './SetTransition.js'; -import { SingletonPredictionContext } from './SingletonPredictionContext.js'; -import { TransitionType } from './TransitionType.js'; -import { Vocabulary } from '../Vocabulary.js'; +import { NotSetTransition } from "./NotSetTransition.js"; +import { PredictionContext } from "./PredictionContext.js"; +import { predictionContextFromRuleContext } from "./PredictionContextUtils.js"; +import { PredictionMode } from "./PredictionMode.js"; +import { RuleStopState } from "./RuleStopState.js"; +import { RuleTransition } from "./RuleTransition.js"; +import { SemanticContext } from "./SemanticContext.js"; +import { SetTransition } from "./SetTransition.js"; +import { SingletonPredictionContext } from "./SingletonPredictionContext.js"; +import { TransitionType } from "./TransitionType.js"; +import { Vocabulary } from "../Vocabulary.js"; + +import { arrayToString } from "../utils/helpers.js"; +import { Parser } from "../Parser.js"; +import { DFA } from "../dfa/DFA.js"; +import { PredictionContextCache } from "./PredictionContextCache.js"; +import { TokenStream } from "../TokenStream.js"; +import { ParserRuleContext } from "../ParserRuleContext.js"; +import { DecisionState } from "./DecisionState.js"; +import { RuleContext } from "../RuleContext.js"; +import { ATNState } from "./ATNState.js"; +import { Transition } from "./Transition.js"; + +import type { EpsilonTransition } from "./EpsilonTransition.js"; +import type { StarLoopEntryState } from "./StarLoopEntryState.js"; +import type { BlockStartState } from "./BlockStartState.js"; +import type { PrecedencePredicateTransition } from "./PrecedencePredicateTransition.js"; +import type { PredicateTransition } from "./PredicateTransition.js"; /** * The embodiment of the adaptive LL(*), ALL(*), parsing strategy. @@ -178,7 +195,7 @@ import { Vocabulary } from '../Vocabulary.js'; ** All instances of the same parser share the same decision DFAs through a * static field. Each instance gets its own ATN simulator but they share the - * same {@link //decisionToDFA} field. They also share a + * same {@link decisionToDFA} field. They also share a * {@link PredictionContextCache} object that makes sure that all * {@link PredictionContext} objects are shared among the DFA states. This makes * a big size difference.
@@ -187,15 +204,15 @@ import { Vocabulary } from '../Vocabulary.js'; * THREAD SAFETY * *- * The {@link ParserATNSimulator} locks on the {@link //decisionToDFA} field when - * it adds a new DFA object to that array. {@link //addDFAEdge} + * The {@link ParserATNSimulator} locks on the {@link decisionToDFA} field when + * it adds a new DFA object to that array. {@link addDFAEdge} * locks on the DFA for the current decision when setting the - * {@link DFAState//edges} field. {@link //addDFAState} locks on + * {@link DFAState//edges} field. {@link addDFAState} locks on * the DFA for the current decision when looking up a DFA state to see if it * already exists. We must make sure that all requests to add DFA states that * are equivalent result in the same shared DFA object. This is because lots of * threads will be trying to update the DFA at once. The - * {@link //addDFAState} method also locks inside the DFA lock + * {@link addDFAState} method also locks inside the DFA lock * but this time on the shared context cache when it rebuilds the * configurations' {@link PredictionContext} objects using cached * subgraphs/nodes. No other locking occurs, even during DFA simulation. This is @@ -206,7 +223,7 @@ import { Vocabulary } from '../Vocabulary.js'; * targets. The DFA simulator will either find {@link DFAState//edges} to be * {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or * {@code dfa.edges[t]} to be non-null. The - * {@link //addDFAEdge} method could be racing to set the field + * {@link addDFAEdge} method could be racing to set the field * but in either case the DFA simulator works; if {@code null}, and requests ATN * simulation. It could also race trying to get {@code dfa.edges[t]}, but either * way it will work because it's not doing a test and set operation.
@@ -222,7 +239,8 @@ import { Vocabulary } from '../Vocabulary.js'; * mode with the {@link BailErrorStrategy}: * *- * parser.{@link Parser//getInterpreter() getInterpreter()}.{@link //setPredictionMode setPredictionMode}{@code (}{@link PredictionMode//SLL}{@code )}; + * parser.{@link Parser//getInterpreter() getInterpreter()}.{@link setPredictionMode setPredictionMode}{@code (} + * {@link PredictionMode//SLL}{@code )}; * parser.{@link Parser//setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}()); ** @@ -261,44 +279,68 @@ import { Vocabulary } from '../Vocabulary.js'; * the input. */ export class ParserATNSimulator extends ATNSimulator { - constructor(parser, atn, decisionToDFA, sharedContextCache) { + public static debug?: boolean; + + // eslint-disable-next-line @typescript-eslint/naming-convention + public static trace_atn_sim = false; + public static debug_add = false; + public static debug_closure = false; + + public static dfa_debug = false; + public static retry_debug = false; + + /** SLL, LL, or LL + exact ambig detection? */ + public predictionMode: number; + public readonly decisionToDFA: DFA[]; + + protected readonly parser: Parser; + + /** + * Each prediction operation uses a cache for merge of prediction contexts. + * Don't keep around as it wastes huge amounts of memory. DoubleKeyMap + * isn't synchronized but we're ok since two threads shouldn't reuse same + * parser/atn sim object because it can only handle one input at a time. + * This maps graphs a and b to merged result c. (a,b)->c. We can avoid + * the merge if we ever see a and b again. Note that (b,a)->c should + * also be examined during cache lookup. + */ + protected mergeCache: DoubleDict
When {@code lookToEndOfRule} is true, this method uses - * {@link ATN//nextTokens} for each configuration in {@code configs} which is + * {@link ATN//nextTokens} for each configuration in `configs` which is * not already in a rule stop state to see if a rule stop state is reachable * from the configuration via epsilon-only transitions.
* * @param configs the configuration set to update * @param lookToEndOfRule when true, this method checks for rule stop states * reachable by epsilon-only transitions from each configuration in - * {@code configs}. + * `configs`. * - * @return {@code configs} if all configurations in {@code configs} are in a + * @returns `configs` if all configurations in `configs` are in a * rule stop state, otherwise return a new configuration set containing only - * the configurations from {@code configs} which are in a rule stop state + * the configurations from `configs` which are in a rule stop state */ - removeAllConfigsNotInRuleStopState(configs, lookToEndOfRule) { + protected removeAllConfigsNotInRuleStopState(configs: ATNConfigSet, lookToEndOfRule: boolean): ATNConfigSet { if (PredictionMode.allConfigsInRuleStopStates(configs)) { return configs; } + const result = new ATNConfigSet(configs.fullCtx); - for (let i = 0; i < configs.items.length; i++) { - const config = configs.items[i]; + for (const config of configs.items) { if (config.state instanceof RuleStopState) { result.add(config, this.mergeCache); continue; } + if (lookToEndOfRule && config.state.epsilonOnlyTransitions) { const nextTokens = this.atn.nextTokens(config.state); if (nextTokens.contains(Token.EPSILON)) { @@ -868,30 +971,33 @@ export class ParserATNSimulator extends ATNSimulator { } } } + return result; } - computeStartState(p, ctx, fullCtx) { + protected computeStartState(p: ATNState, ctx: RuleContext, fullCtx: boolean): ATNConfigSet { // always at least the implicit call to start rule const initialContext = predictionContextFromRuleContext(this.atn, ctx); const configs = new ATNConfigSet(fullCtx); - if (this.trace_atn_sim) { - console.log("computeStartState from ATN state " + p + " initialContext=" + initialContext.toString(this.parser)); + if (ParserATNSimulator.trace_atn_sim) { + console.log("computeStartState from ATN state " + p + " initialContext=" + + initialContext.toString(this.parser)); } for (let i = 0; i < p.transitions.length; i++) { const target = p.transitions[i].target; const c = new ATNConfig({ state: target, alt: i + 1, context: initialContext }, null); - const closureBusy = new HashSet(); + const closureBusy = new HashSet* The default implementation of this method uses the following @@ -1083,7 +1194,7 @@ export class ParserATNSimulator extends ATNSimulator { * the parser. Specifically, this could occur if the only configuration * capable of successfully parsing to the end of the decision rule is * blocked by a semantic predicate. By choosing this alternative within - * {@link //adaptivePredict} instead of throwing a + * {@link adaptivePredict} instead of throwing a * {@link NoViableAltException}, the resulting * {@link FailedPredicateException} in the parser will identify the specific * predicate which is preventing the parser from successfully parsing the @@ -1092,22 +1203,24 @@ export class ParserATNSimulator extends ATNSimulator { *
* * @param configs The ATN configurations which were valid immediately before - * the {@link //ERROR} state was reached + * the {@link ERROR} state was reached * @param outerContext The is the \gamma_0 initial parser context from the paper * or the parser stack at the instant before prediction commences. * - * @return The value to return from {@link //adaptivePredict}, or + * @returns The value to return from {@link adaptivePredict}, or * {@link ATN//INVALID_ALT_NUMBER} if a suitable alternative was not - * identified and {@link //adaptivePredict} should report an error instead + * identified and {@link adaptivePredict} should report an error instead */ - getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(configs, outerContext) { - const cfgs = this.splitAccordingToSemanticValidity(configs, outerContext); - const semValidConfigs = cfgs[0]; - const semInvalidConfigs = cfgs[1]; + protected getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(configs: ATNConfigSet, + outerContext: ParserRuleContext): number { + const splitConfigs = this.splitAccordingToSemanticValidity(configs, outerContext); + const semValidConfigs = splitConfigs[0]; + const semInvalidConfigs = splitConfigs[1]; let alt = this.getAltThatFinishedDecisionEntryRule(semValidConfigs); if (alt !== ATN.INVALID_ALT_NUMBER) { // semantically/syntactically viable path exists return alt; } + // Is there a syntactically valid path with a failed pred? if (semInvalidConfigs.items.length > 0) { alt = this.getAltThatFinishedDecisionEntryRule(semInvalidConfigs); @@ -1115,14 +1228,14 @@ export class ParserATNSimulator extends ATNSimulator { return alt; } } + return ATN.INVALID_ALT_NUMBER; } - getAltThatFinishedDecisionEntryRule(configs) { + protected getAltThatFinishedDecisionEntryRule(configs: ATNConfigSet): number { const alts = []; - for (let i = 0; i < configs.items.length; i++) { - const c = configs.items[i]; - if (c.reachesIntoOuterContext > 0 || ((c.state instanceof RuleStopState) && c.context.hasEmptyPath())) { + for (const c of configs.items) { + if (c.reachesIntoOuterContext > 0 || ((c.state instanceof RuleStopState) && c.context!.hasEmptyPath())) { if (alts.indexOf(c.alt) < 0) { alts.push(c.alt); } @@ -1131,7 +1244,7 @@ export class ParserATNSimulator extends ATNSimulator { if (alts.length === 0) { return ATN.INVALID_ALT_NUMBER; } else { - return Math.min.apply(null, alts); + return Math.min(...alts); } } @@ -1143,12 +1256,14 @@ export class ParserATNSimulator extends ATNSimulator { * Create a new set so as not to alter the incoming parameter. * * Assumption: the input stream has been restored to the starting point - * prediction, which is where predicates need to evaluate.*/ - splitAccordingToSemanticValidity(configs, outerContext) { + * prediction, which is where predicates need to evaluate. + */ + protected splitAccordingToSemanticValidity(configs: ATNConfigSet, + outerContext: ParserRuleContext): [ATNConfigSet, ATNConfigSet] { const succeeded = new ATNConfigSet(configs.fullCtx); const failed = new ATNConfigSet(configs.fullCtx); - for (let i = 0; i < configs.items.length; i++) { - const c = configs.items[i]; + + for (const c of configs.items) { if (c.semanticContext !== SemanticContext.NONE) { const predicateEvaluationResult = c.semanticContext.evaluate(this.parser, outerContext); if (predicateEvaluationResult) { @@ -1160,6 +1275,7 @@ export class ParserATNSimulator extends ATNSimulator { succeeded.add(c); } } + return [succeeded, failed]; } @@ -1170,10 +1286,11 @@ export class ParserATNSimulator extends ATNSimulator { * then we stop at the first predicate that evaluates to true. This * includes pairs with null predicates. */ - evalSemanticContext(predPredictions, outerContext, complete) { + protected evalSemanticContext(predPredictions: DFAState.PredPrediction[], outerContext: ParserRuleContext, + complete: boolean): BitSet { const predictions = new BitSet(); - for (let i = 0; i < predPredictions.length; i++) { - const pair = predPredictions[i]; + + for (const pair of predPredictions) { if (pair.pred === SemanticContext.NONE) { predictions.set(pair.alt); if (!complete) { @@ -1182,11 +1299,11 @@ export class ParserATNSimulator extends ATNSimulator { continue; } const predicateEvaluationResult = pair.pred.evaluate(this.parser, outerContext); - if (this.debug || this.dfa_debug) { + if (ParserATNSimulator.debug || ParserATNSimulator.dfa_debug) { console.log("eval pred " + pair + "=" + predicateEvaluationResult); } if (predicateEvaluationResult) { - if (this.debug || this.dfa_debug) { + if (ParserATNSimulator.debug || ParserATNSimulator.dfa_debug) { console.log("PREDICT " + pair.alt); } predictions.set(pair.alt); @@ -1195,6 +1312,7 @@ export class ParserATNSimulator extends ATNSimulator { } } } + return predictions; } @@ -1204,28 +1322,31 @@ export class ParserATNSimulator extends ATNSimulator { // waste to pursue the closure. Might have to advance when we do // ambig detection thought :( // - closure(config, configs, closureBusy, collectPredicates, fullCtx, treatEofAsEpsilon) { + protected closure(config: ATNConfig, configs: ATNConfigSet, closureBusy: HashSetIf {@code to} is {@code null}, this method returns {@code null}. * Otherwise, this method returns the {@link DFAState} returned by calling - * {@link //addDFAState} for the {@code to} state.
+ * {@link addDFAState} for the {@code to} state. * * @param dfa The DFA * @param from_ The source state for the edge * @param t The input symbol * @param to The target state for the edge * - * @return If {@code to} is {@code null}, this method returns {@code null}; - * otherwise this method returns the result of calling {@link //addDFAState} + * @returns If {@code to} is {@code null}, this method returns {@code null}; + * otherwise this method returns the result of calling {@link addDFAState} * on {@code to} */ - addDFAEdge(dfa, from_, t, to) { - if (this.debug) { + protected addDFAEdge(dfa: DFA, from_: DFAState, t: number, to: DFAState): DFAState | null { + if (ParserATNSimulator.debug) { console.log("EDGE " + from_ + " -> " + to + " upon " + this.getTokenName(t)); } if (to === null) { return null; } + to = this.addDFAState(dfa, to); // used existing if possible not incoming if (from_ === null || t < -1 || t > this.atn.maxTokenType) { return to; } + if (from_.edges === null) { - from_.edges = []; + from_.edges = new Array(this.atn.maxTokenType + 2); + from_.edges.fill(null); } + from_.edges[t + 1] = to; // connect - if (this.debug) { - console.log("DFA=\n" + dfa.toString(this.parser != null ? parser.vocabulary : Vocabulary.EMPTY_VOCABULARY)); + if (ParserATNSimulator.debug) { + console.log("DFA=\n" + + dfa.toString(this.parser != null ? this.parser.vocabulary : Vocabulary.EMPTY_VOCABULARY)); } return to; @@ -1661,22 +1754,23 @@ export class ParserATNSimulator extends ATNSimulator { * is already in the DFA, the existing state is returned. Otherwise this * method returns {@code D} after adding it to the DFA. * - *If {@code D} is {@link //ERROR}, this method returns {@link //ERROR} and + *
If {@code D} is {@link ERROR}, this method returns {@link ERROR} and * does not change the DFA.
* * @param dfa The dfa * @param D The DFA state to add - * @return The state stored in the DFA. This will be either the existing + * @returns The state stored in the DFA. This will be either the existing * state if {@code D} is already in the DFA, or {@code D} itself if the * state was not already present */ - addDFAState(dfa, D) { + protected addDFAState(dfa: DFA, D: DFAState): DFAState { if (D === ATNSimulator.ERROR) { return D; } const existing = dfa.states.get(D); if (existing !== null) { - if (this.trace_atn_sim) console.log("addDFAState " + D + " exists"); + if (ParserATNSimulator.trace_atn_sim) { console.log("addDFAState " + D + " exists"); } + return existing; } D.stateNumber = dfa.states.length; @@ -1685,47 +1779,55 @@ export class ParserATNSimulator extends ATNSimulator { D.configs.setReadonly(true); } - if (this.trace_atn_sim) console.log("addDFAState new " + D); + if (ParserATNSimulator.trace_atn_sim) { console.log("addDFAState new " + D); } dfa.states.add(D); - if (this.debug) { + if (ParserATNSimulator.debug) { console.log("adding new DFA state: " + D); } + return D; } - reportAttemptingFullContext(dfa, conflictingAlts, configs, startIndex, stopIndex) { - if (this.debug || this.retry_debug) { + protected reportAttemptingFullContext(dfa: DFA, conflictingAlts: BitSet, configs: ATNConfigSet, startIndex: number, + stopIndex: number): void { + if (ParserATNSimulator.debug || ParserATNSimulator.retry_debug) { const interval = new Interval(startIndex, stopIndex + 1); console.log("reportAttemptingFullContext decision=" + dfa.decision + ":" + configs + ", input=" + this.parser.tokenStream.getText(interval)); } + if (this.parser !== null) { - this.parser.getErrorListenerDispatch().reportAttemptingFullContext(this.parser, dfa, startIndex, stopIndex, conflictingAlts, configs); + this.parser.getErrorListenerDispatch().reportAttemptingFullContext(this.parser, dfa, startIndex, stopIndex, + conflictingAlts, configs); } } - reportContextSensitivity(dfa, prediction, configs, startIndex, stopIndex) { - if (this.debug || this.retry_debug) { + protected reportContextSensitivity(dfa: DFA, prediction: number, configs: ATNConfigSet, startIndex: number, + stopIndex: number): void { + if (ParserATNSimulator.debug || ParserATNSimulator.retry_debug) { const interval = new Interval(startIndex, stopIndex + 1); console.log("reportContextSensitivity decision=" + dfa.decision + ":" + configs + ", input=" + this.parser.tokenStream.getText(interval)); } + if (this.parser !== null) { - this.parser.getErrorListenerDispatch().reportContextSensitivity(this.parser, dfa, startIndex, stopIndex, prediction, configs); + this.parser.getErrorListenerDispatch().reportContextSensitivity(this.parser, dfa, startIndex, stopIndex, + prediction, configs); } } // If context sensitive parsing, we know it's ambiguity not conflict// - reportAmbiguity(dfa, D, startIndex, stopIndex, - exact, ambigAlts, configs) { - if (this.debug || this.retry_debug) { + protected reportAmbiguity(dfa: DFA, D: DFAState, startIndex: number, stopIndex: number, + exact: boolean, ambigAlts: BitSet | null, configs: ATNConfigSet): void { + if (ParserATNSimulator.debug || ParserATNSimulator.retry_debug) { const interval = new Interval(startIndex, stopIndex + 1); console.log("reportAmbiguity " + ambigAlts + ":" + configs + ", input=" + this.parser.tokenStream.getText(interval)); } if (this.parser !== null) { - this.parser.getErrorListenerDispatch().reportAmbiguity(this.parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs); + this.parser.getErrorListenerDispatch().reportAmbiguity(this.parser, dfa, startIndex, stopIndex, exact, + ambigAlts, configs); } } } diff --git a/src/atn/PlusBlockStartState.js b/src/atn/PlusBlockStartState.js deleted file mode 100644 index e06c1d4..0000000 --- a/src/atn/PlusBlockStartState.js +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { ATNStateType } from "./ATNStateType.js"; -import { BlockStartState } from "./BlockStartState.js"; - -/** - * Start of {@code (A|B|...)+} loop. Technically a decision state, but - * we don't use for code generation; somebody might need it, so I'm defining - * it for completeness. In reality, the {@link PlusLoopbackState} node is the - * real decision-making note for {@code A+} - */ -export class PlusBlockStartState extends BlockStartState { - get stateType() { - return ATNStateType.PLUS_BLOCK_START; - } - -} diff --git a/src/atn/PlusBlockStartState.d.ts b/src/atn/PlusBlockStartState.ts similarity index 82% rename from src/atn/PlusBlockStartState.d.ts rename to src/atn/PlusBlockStartState.ts index f187dfd..9d7025c 100644 --- a/src/atn/PlusBlockStartState.d.ts +++ b/src/atn/PlusBlockStartState.ts @@ -4,6 +4,7 @@ * can be found in the LICENSE.txt file in the project root. */ +import { ATNStateType } from "./ATNStateType.js"; import { BlockStartState } from "./BlockStartState.js"; import { PlusLoopbackState } from "./PlusLoopbackState.js"; @@ -15,4 +16,9 @@ import { PlusLoopbackState } from "./PlusLoopbackState.js"; */ export class PlusBlockStartState extends BlockStartState { public loopBackState: PlusLoopbackState; + + public override get stateType(): number { + return ATNStateType.PLUS_BLOCK_START; + } + } diff --git a/src/atn/PlusLoopbackState.d.ts b/src/atn/PlusLoopbackState.d.ts deleted file mode 100644 index c7cc4a3..0000000 --- a/src/atn/PlusLoopbackState.d.ts +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { DecisionState } from "./DecisionState.js"; - -/** - * Decision state for {@code A+} and {@code (A|B)+}. It has two transitions: - * one to the loop back to start of the block and one to exit. - */ -export class PlusLoopbackState extends DecisionState { -} diff --git a/src/atn/PlusLoopbackState.js b/src/atn/PlusLoopbackState.ts similarity index 92% rename from src/atn/PlusLoopbackState.js rename to src/atn/PlusLoopbackState.ts index 9d4aab4..564c14a 100644 --- a/src/atn/PlusLoopbackState.js +++ b/src/atn/PlusLoopbackState.ts @@ -12,7 +12,7 @@ import { ATNStateType } from "./ATNStateType.js"; * one to the loop back to start of the block and one to exit. */ export class PlusLoopbackState extends DecisionState { - get stateType() { + public override get stateType(): number { return ATNStateType.PLUS_LOOP_BACK; } diff --git a/src/atn/PrecedencePredicate.d.ts b/src/atn/PrecedencePredicate.d.ts deleted file mode 100644 index 25539f5..0000000 --- a/src/atn/PrecedencePredicate.d.ts +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { Parser } from "../Parser.js"; -import { RuleContext } from "./RuleContext.js"; -import { SemanticContext } from "./SemanticContext.js"; - -export declare class PrecedencePredicate extends SemanticContext { - public precedence: number; - - public constructor(precedence?: number); - - public evaluate(parser: Parser, parserCallStack: RuleContext): boolean; - public evalPrecedence(parser: Parser, parserCallStack: RuleContext): SemanticContext; - - public compareTo(o: PrecedencePredicate): number; - public equals(other: unknown): boolean; - public toString(): string; -} diff --git a/src/atn/PrecedencePredicate.js b/src/atn/PrecedencePredicate.js deleted file mode 100644 index 8ea7423..0000000 --- a/src/atn/PrecedencePredicate.js +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { SemanticContext } from "./SemanticContext.js"; - -export class PrecedencePredicate extends SemanticContext { - - constructor(precedence) { - super(); - this.precedence = precedence === undefined ? 0 : precedence; - } - - evaluate(parser, outerContext) { - return parser.precpred(outerContext, this.precedence); - } - - evalPrecedence(parser, outerContext) { - if (parser.precpred(outerContext, this.precedence)) { - return SemanticContext.NONE; - } else { - return null; - } - } - - compareTo(other) { - return this.precedence - other.precedence; - } - - updateHashCode(hash) { - hash.update(this.precedence); - } - - equals(other) { - if (this === other) { - return true; - } else if (!(other instanceof PrecedencePredicate)) { - return false; - } else { - return this.precedence === other.precedence; - } - } - - toString() { - return "{" + this.precedence + ">=prec}?"; - } - -} - -// HORRIBLE workaround circular import, avoiding dynamic import -SemanticContext.PrecedencePredicate = PrecedencePredicate; diff --git a/src/atn/PrecedencePredicateTransition.d.ts b/src/atn/PrecedencePredicateTransition.d.ts deleted file mode 100644 index 446f0b8..0000000 --- a/src/atn/PrecedencePredicateTransition.d.ts +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { AbstractPredicateTransition } from "../atn/AbstractPredicateTransition.js"; -import { PrecedencePredicate } from "../atn/PrecedencePredicate.js"; -import { ATNState } from "../atn/ATNState.js"; - -export declare class PrecedencePredicateTransition extends AbstractPredicateTransition { - public precedence: number; - - public constructor(target: ATNState, precedence: number); - - public matches(symbol: number, minVocabSymbol: number, maxVocabSymbol: number): boolean; - public getPredicate(): PrecedencePredicate; - public toString(): string; -} diff --git a/src/atn/PrecedencePredicateTransition.js b/src/atn/PrecedencePredicateTransition.js deleted file mode 100644 index e768140..0000000 --- a/src/atn/PrecedencePredicateTransition.js +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { PrecedencePredicate } from "../atn/PrecedencePredicate.js"; -import { AbstractPredicateTransition } from "../atn/AbstractPredicateTransition.js"; -import { TransitionType } from "./TransitionType.js"; - -export class PrecedencePredicateTransition extends AbstractPredicateTransition { - constructor(target, precedence) { - super(target); - this.serializationType = TransitionType.PRECEDENCE; - this.precedence = precedence; - this.isEpsilon = true; - } - - matches(symbol, minVocabSymbol, maxVocabSymbol) { - return false; - } - - getPredicate() { - return new PrecedencePredicate(this.precedence); - } - - toString() { - return this.precedence + " >= _p"; - } -} diff --git a/src/atn/PrecedencePredicateTransition.ts b/src/atn/PrecedencePredicateTransition.ts new file mode 100644 index 0000000..335b09b --- /dev/null +++ b/src/atn/PrecedencePredicateTransition.ts @@ -0,0 +1,39 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import { SemanticContext } from "./SemanticContext.js"; +import { AbstractPredicateTransition } from "./AbstractPredicateTransition.js"; +import { TransitionType } from "./TransitionType.js"; +import { ATNState } from "./ATNState.js"; + +export class PrecedencePredicateTransition extends AbstractPredicateTransition { + public readonly precedence: number; + + public constructor(target: ATNState, precedence: number) { + super(target); + this.precedence = precedence; + } + + public override get isEpsilon(): boolean { + return true; + } + + public matches(_symbol: number, _minVocabSymbol: number, _maxVocabSymbol: number): boolean { + return false; + } + + public getPredicate(): SemanticContext.PrecedencePredicate { + return new SemanticContext.PrecedencePredicate(this.precedence); + } + + public get serializationType(): number { + return TransitionType.PRECEDENCE; + } + + public override toString(): string { + return this.precedence + " >= _p"; + } +} diff --git a/src/atn/Predicate.d.ts b/src/atn/Predicate.d.ts deleted file mode 100644 index 63c107c..0000000 --- a/src/atn/Predicate.d.ts +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { Parser } from "../Parser.js"; -import { RuleContext } from "./RuleContext.js"; -import { SemanticContext } from "./SemanticContext.js"; - -export declare class Predicate extends SemanticContext { - public constructor(ruleIndex: number, predIndex: number, isCtxDependent: boolean); - - public evaluate(parser: Parser, outerContext: RuleContext): boolean; - public equals(obj: unknown): boolean; - public toString(): string; -} diff --git a/src/atn/Predicate.js b/src/atn/Predicate.js deleted file mode 100644 index 8dd946d..0000000 --- a/src/atn/Predicate.js +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { SemanticContext } from "./SemanticContext.js"; - -export class Predicate extends SemanticContext { - - constructor(ruleIndex, predIndex, isCtxDependent) { - super(); - this.ruleIndex = ruleIndex === undefined ? -1 : ruleIndex; - this.predIndex = predIndex === undefined ? -1 : predIndex; - this.isCtxDependent = isCtxDependent === undefined ? false : isCtxDependent; // e.g., $i ref in pred - } - - evaluate(parser, outerContext) { - const localctx = this.isCtxDependent ? outerContext : null; - return parser.sempred(localctx, this.ruleIndex, this.predIndex); - } - - updateHashCode(hash) { - hash.update(this.ruleIndex, this.predIndex, this.isCtxDependent); - } - - equals(other) { - if (this === other) { - return true; - } else if (!(other instanceof Predicate)) { - return false; - } else { - return this.ruleIndex === other.ruleIndex && - this.predIndex === other.predIndex && - this.isCtxDependent === other.isCtxDependent; - } - } - - toString() { - return "{" + this.ruleIndex + ":" + this.predIndex + "}?"; - } -} - -/** - * The default {@link SemanticContext}, which is semantically equivalent to - * a predicate of the form {@code {true}?} - */ -SemanticContext.NONE = new Predicate(); diff --git a/src/atn/PredicateTransition.d.ts b/src/atn/PredicateTransition.d.ts deleted file mode 100644 index 5b37831..0000000 --- a/src/atn/PredicateTransition.d.ts +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { AbstractPredicateTransition } from "../atn/AbstractPredicateTransition.js"; -import { Predicate } from "../atn/Predicate.js"; -import { ATNState } from "../atn/ATNState.js"; - -export declare class PredicateTransition extends AbstractPredicateTransition { - public ruleIndex: number; - public predIndex: number; - public isCtxDependent: boolean; - public isEpsilon: boolean; - - public constructor(target: ATNState, ruleIndex: number, predIndex: number, isCtxDependent: boolean); - - public matches(symbol: number, minVocabSymbol: number, maxVocabSymbol: number): boolean; - public getPredicate(): Predicate; - public toString(): string; -} diff --git a/src/atn/PredicateTransition.js b/src/atn/PredicateTransition.js deleted file mode 100644 index 3420846..0000000 --- a/src/atn/PredicateTransition.js +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -import { Predicate } from "../atn/Predicate.js"; -import { AbstractPredicateTransition } from "../atn/AbstractPredicateTransition.js"; -import { TransitionType } from "./TransitionType.js"; - -export class PredicateTransition extends AbstractPredicateTransition { - constructor(target, ruleIndex, predIndex, isCtxDependent) { - super(target); - this.serializationType = TransitionType.PREDICATE; - this.ruleIndex = ruleIndex; - this.predIndex = predIndex; - this.isCtxDependent = isCtxDependent; // e.g., $i ref in pred - this.isEpsilon = true; - } - - matches(symbol, minVocabSymbol, maxVocabSymbol) { - return false; - } - - getPredicate() { - return new Predicate(this.ruleIndex, this.predIndex, this.isCtxDependent); - } - - toString() { - return "pred_" + this.ruleIndex + ":" + this.predIndex; - } -} diff --git a/src/atn/PredicateTransition.ts b/src/atn/PredicateTransition.ts new file mode 100644 index 0000000..68fe4ce --- /dev/null +++ b/src/atn/PredicateTransition.ts @@ -0,0 +1,43 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +import { SemanticContext } from "./SemanticContext.js"; +import { AbstractPredicateTransition } from "./AbstractPredicateTransition.js"; +import { TransitionType } from "./TransitionType.js"; +import { ATNState } from "./ATNState.js"; + +export class PredicateTransition extends AbstractPredicateTransition { + public readonly ruleIndex: number; + public readonly predIndex: number; + public readonly isCtxDependent: boolean; // e.g., $i ref in pred + + public constructor(target: ATNState, ruleIndex: number, predIndex: number, isCtxDependent: boolean) { + super(target); + this.ruleIndex = ruleIndex; + this.predIndex = predIndex; + this.isCtxDependent = isCtxDependent; // e.g., $i ref in pred + } + + public override get isEpsilon(): boolean { + return true; + } + + public matches(_symbol: number, _minVocabSymbol: number, _maxVocabSymbol: number): boolean { + return false; + } + + public get serializationType(): number { + return TransitionType.PREDICATE; + } + + public getPredicate(): SemanticContext.Predicate { + return new SemanticContext.Predicate(this.ruleIndex, this.predIndex, this.isCtxDependent); + } + + public override toString(): string { + return "pred_" + this.ruleIndex + ":" + this.predIndex; + } +} diff --git a/src/atn/PredictionContext.js b/src/atn/PredictionContext.js deleted file mode 100644 index 91d5a4f..0000000 --- a/src/atn/PredictionContext.js +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) The ANTLR Project. All rights reserved. - * Use of this file is governed by the BSD 3-clause license that - * can be found in the LICENSE.txt file in the project root. - */ - -export class PredictionContext { - constructor(cachedHashCode) { - this.cachedHashCode = cachedHashCode; - } - - /** - * Stores the computed hash code of this {@link PredictionContext}. The hash - * code is computed in parts to match the following reference algorithm. - * - *- * private int referenceHashCode() { - * int hash = {@link MurmurHash//initialize MurmurHash.initialize}({@link - * //INITIAL_HASH}); - * - * for (int i = 0; i < {@link //size()}; i++) { - * hash = {@link MurmurHash//update MurmurHash.update}(hash, {@link //getParent - * getParent}(i)); - * } - * - * for (int i = 0; i < {@link //size()}; i++) { - * hash = {@link MurmurHash//update MurmurHash.update}(hash, {@link - * //getReturnState getReturnState}(i)); - * } - * - * hash = {@link MurmurHash//finish MurmurHash.finish}(hash, 2// {@link - * //size()}); - * return hash; - * } - *- * This means only the {@link //EMPTY} context is in set. - */ - isEmpty() { - return this === PredictionContext.EMPTY; - } - - hasEmptyPath() { - return this.getReturnState(this.length - 1) === PredictionContext.EMPTY_RETURN_STATE; - } - - hashCode() { - return this.cachedHashCode; - } - - updateHashCode(hash) { - hash.update(this.cachedHashCode); - } -} - -/** - * Represents {@code $} in local context prediction, which means wildcard. - * {@code//+x =//}. - */ -PredictionContext.EMPTY = null; - -/** - * Represents {@code $} in an array in full context mode, when {@code $} - * doesn't mean wildcard: {@code $ + x = [$,x]}. Here, - * {@code $} = {@link //EMPTY_RETURN_STATE}. - */ -PredictionContext.EMPTY_RETURN_STATE = 0x7FFFFFFF; - -PredictionContext.globalNodeCount = 1; -PredictionContext.id = PredictionContext.globalNodeCount; -PredictionContext.trace_atn_sim = false; diff --git a/src/atn/PredictionContext.ts b/src/atn/PredictionContext.ts new file mode 100644 index 0000000..b1a155a --- /dev/null +++ b/src/atn/PredictionContext.ts @@ -0,0 +1,58 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/* eslint-disable @typescript-eslint/naming-convention, jsdoc/require-returns, jsdoc/require-param */ + +import { Recognizer } from "../Recognizer.js"; +import { HashCode } from "../misc/HashCode.js"; +import { ATNSimulator } from "./ATNSimulator.js"; + +// Most of the implementation is located in PredictionContextUtils.ts, to avoid circular dependencies. + +export abstract class PredictionContext { + /** + * Represents {@code $} in an array in full context mode, when {@code $} + * doesn't mean wildcard: {@code $ + x = [$,x]}. Here, + * {@code $} = {@link EMPTY_RETURN_STATE}. + */ + public static readonly EMPTY_RETURN_STATE = 0x7FFFFFFF; + + // TODO: Temporarily here. Should be moved to EmptyPredictionContext. It's initialized in that context class. + public static EMPTY: PredictionContext; + + public static trace_atn_sim = false; + + private cachedHashCode: number; + + public constructor(cachedHashCode: number) { + this.cachedHashCode = cachedHashCode; + } + + public isEmpty(): boolean { + return false; + } + + public hasEmptyPath(): boolean { + return this.getReturnState(this.length - 1) === PredictionContext.EMPTY_RETURN_STATE; + } + + public hashCode(): number { + return this.cachedHashCode; + } + + public updateHashCode(hash: HashCode): void { + hash.update(this.cachedHashCode); + } + + public toString(_recog?: Recognizer
These local-context merge operations are used when {@code rootIsWildcard} * is true.
* - *{@link //EMPTY} is superset of any graph; return {@link //EMPTY}.
+ *
{@link EMPTY} is superset of any graph; return {@link EMPTY}.
*
{@link //EMPTY} and anything is {@code //EMPTY}, so merged parent is + *
{@link EMPTY} and anything is {@code //EMPTY}, so merged parent is
* {@code //EMPTY}; return left graph.
*
Must keep all contexts; {@link //EMPTY} in array is a special value (and + *
Must keep all contexts; {@link EMPTY} in array is a special value (and
* null parent).
*
- * When using this prediction mode, the parser will either return a correct - * parse tree (i.e. the same parse tree that would be returned with the - * {@link //LL} prediction mode), or it will report a syntax error. If a - * syntax error is encountered when using the {@link //SLL} prediction mode, - * it may be due to either an actual syntax error in the input or indicate - * that the particular combination of grammar and input requires the more - * powerful {@link //LL} prediction abilities to complete successfully.
- * - *- * This prediction mode does not provide any guarantees for prediction - * behavior for syntactically-incorrect inputs.
- */ - SLL: 0, - - /** - * The LL(*) prediction mode. This prediction mode allows the current parser - * context to be used for resolving SLL conflicts that occur during - * prediction. This is the fastest prediction mode that guarantees correct - * parse results for all combinations of grammars with syntactically correct - * inputs. - * - *- * When using this prediction mode, the parser will make correct decisions - * for all syntactically-correct grammar and input combinations. However, in - * cases where the grammar is truly ambiguous this prediction mode might not - * report a precise answer for exactly which alternatives are - * ambiguous.
- * - *- * This prediction mode does not provide any guarantees for prediction - * behavior for syntactically-incorrect inputs.
- */ - LL: 1, - - /** - * - * The LL(*) prediction mode with exact ambiguity detection. In addition to - * the correctness guarantees provided by the {@link //LL} prediction mode, - * this prediction mode instructs the prediction algorithm to determine the - * complete and exact set of ambiguous alternatives for every ambiguous - * decision encountered while parsing. - * - *- * This prediction mode may be used for diagnosing ambiguities during - * grammar development. Due to the performance overhead of calculating sets - * of ambiguous alternatives, this prediction mode should be avoided when - * the exact results are not necessary.
- * - *- * This prediction mode does not provide any guarantees for prediction - * behavior for syntactically-incorrect inputs.
- */ - LL_EXACT_AMBIG_DETECTION: 2, - - /** - * - * Computes the SLL prediction termination condition. - * - *- * This method computes the SLL prediction termination condition for both of - * the following cases.
- * - *COMBINED SLL+LL PARSING
- * - *When LL-fallback is enabled upon SLL conflict, correct predictions are - * ensured regardless of how the termination condition is computed by this - * method. Due to the substantially higher cost of LL prediction, the - * prediction should only fall back to LL when the additional lookahead - * cannot lead to a unique SLL prediction.
- * - *Assuming combined SLL+LL parsing, an SLL configuration set with only - * conflicting subsets should fall back to full LL, even if the - * configuration sets don't resolve to the same alternative (e.g. - * {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting - * configuration, SLL could continue with the hopes that more lookahead will - * resolve via one of those non-conflicting configurations.
- * - *Here's the prediction termination rule them: SLL (for SLL+LL parsing) - * stops when it sees only conflicting configuration subsets. In contrast, - * full LL keeps going when there is uncertainty.
- * - *HEURISTIC
- * - *As a heuristic, we stop prediction when we see any conflicting subset - * unless we see a state that only has one alternative associated with it. - * The single-alt-state thing lets prediction continue upon rules like - * (otherwise, it would admit defeat too soon):
- * - *{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}
- * - *When the ATN simulation reaches the state before {@code ';'}, it has a - * DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally - * {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop - * processing this node because alternative to has another way to continue, - * via {@code [6|2|[]]}.
- * - *It also let's us continue for this rule:
- * - *{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}
- * - *After matching input A, we reach the stop state for rule A, state 1. - * State 8 is the state right before B. Clearly alternatives 1 and 2 - * conflict and no amount of further lookahead will separate the two. - * However, alternative 3 will be able to continue and so we do not stop - * working on this state. In the previous example, we're concerned with - * states associated with the conflicting alternatives. Here alt 3 is not - * associated with the conflicting configs, but since we can continue - * looking for input reasonably, don't declare the state done.
- * - *PURE SLL PARSING
- * - *To handle pure SLL parsing, all we have to do is make sure that we - * combine stack contexts for configurations that differ only by semantic - * predicate. From there, we can do the usual SLL termination heuristic.
- * - *PREDICATES IN SLL+LL PARSING
- * - *SLL decisions don't evaluate predicates until after they reach DFA stop - * states because they need to create the DFA cache that works in all - * semantic situations. In contrast, full LL evaluates predicates collected - * during start state computation so it can ignore predicates thereafter. - * This means that SLL termination detection can totally ignore semantic - * predicates.
- * - *Implementation-wise, {@link ATNConfigSet} combines stack contexts but not - * semantic predicate contexts so we might see two configurations like the - * following.
- * - *{@code (s, 1, x, {}), (s, 1, x', {p})}
- * - *Before testing these configurations against others, we have to merge - * {@code x} and {@code x'} (without modifying the existing configurations). - * For example, we test {@code (x+x')==x''} when looking for conflicts in - * the following configurations.
- * - *{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}
- * - *If the configuration set has predicates (as indicated by - * {@link ATNConfigSet//hasSemanticContext}), this algorithm makes a copy of - * the configurations to strip out all of the predicates so that a standard - * {@link ATNConfigSet} will merge everything ignoring predicates.
- */ - hasSLLConflictTerminatingPrediction: function (mode, configs) { - // Configs in rule stop states indicate reaching the end of the decision - // rule (local context) or end of start rule (full context). If all - // configs meet this condition, then none of the configurations is able - // to match additional input so we terminate prediction. - // - if (PredictionMode.allConfigsInRuleStopStates(configs)) { - return true; - } - // pure SLL mode parsing - if (mode === PredictionMode.SLL) { - // Don't bother with combining configs from different semantic - // contexts if we can fail over to full LL; costs more time - // since we'll often fail over anyway. - if (configs.hasSemanticContext) { - // dup configs, tossing out semantic predicates - const dup = new ATNConfigSet(); - for (let i = 0; i < configs.items.length; i++) { - let c = configs.items[i]; - c = new ATNConfig({ semanticContext: SemanticContext.NONE }, c); - dup.add(c); - } - configs = dup; - } - // now we have combined contexts for configs with dissimilar preds - } - // pure SLL or combined SLL+LL mode parsing - const altsets = PredictionMode.getConflictingAltSubsets(configs); - return PredictionMode.hasConflictingAltSet(altsets) && !PredictionMode.hasStateAssociatedWithOneAlt(configs); - }, - - /** - * Checks if any configuration in {@code configs} is in a - * {@link RuleStopState}. Configurations meeting this condition have reached - * the end of the decision rule (local context) or end of start rule (full - * context). - * - * @param configs the configuration set to test - * @return {@code true} if any configuration in {@code configs} is in a - * {@link RuleStopState}, otherwise {@code false} - */ - hasConfigInRuleStopState: function (configs) { - for (let i = 0; i < configs.items.length; i++) { - const c = configs.items[i]; - if (c.state instanceof RuleStopState) { - return true; - } - } - return false; - }, - - /** - * Checks if all configurations in {@code configs} are in a - * {@link RuleStopState}. Configurations meeting this condition have reached - * the end of the decision rule (local context) or end of start rule (full - * context). - * - * @param configs the configuration set to test - * @return {@code true} if all configurations in {@code configs} are in a - * {@link RuleStopState}, otherwise {@code false} - */ - allConfigsInRuleStopStates: function (configs) { - for (let i = 0; i < configs.items.length; i++) { - const c = configs.items[i]; - if (!(c.state instanceof RuleStopState)) { - return false; - } - } - return true; - }, - - /** - * - * Full LL prediction termination. - * - *Can we stop looking ahead during ATN simulation or is there some - * uncertainty as to which alternative we will ultimately pick, after - * consuming more input? Even if there are partial conflicts, we might know - * that everything is going to resolve to the same minimum alternative. That - * means we can stop since no more lookahead will change that fact. On the - * other hand, there might be multiple conflicts that resolve to different - * minimums. That means we need more look ahead to decide which of those - * alternatives we should predict.
- * - *The basic idea is to split the set of configurations {@code C}, into - * conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with - * non-conflicting configurations. Two configurations conflict if they have - * identical {@link ATNConfig//state} and {@link ATNConfig//context} values - * but different {@link ATNConfig//alt} value, e.g. {@code (s, i, ctx, _)} - * and {@code (s, j, ctx, _)} for {@code i!=j}.
- * - *Reduce these configuration subsets to the set of possible alternatives. - * You can compute the alternative subsets in one pass as follows:
- * - *{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in - * {@code C} holding {@code s} and {@code ctx} fixed.
- * - *Or in pseudo-code, for each configuration {@code c} in {@code C}:
- * - *- * map[c] U= c.{@link ATNConfig//alt alt} // map hash/equals uses s and x, not - * alt and not pred - *- * - *
The values in {@code map} are the set of {@code A_s,ctx} sets.
- * - *If {@code |A_s,ctx|=1} then there is no conflict associated with - * {@code s} and {@code ctx}.
- * - *Reduce the subsets to singletons by choosing a minimum of each subset. If - * the union of these alternative subsets is a singleton, then no amount of - * more lookahead will help us. We will always pick that alternative. If, - * however, there is more than one alternative, then we are uncertain which - * alternative to predict and must continue looking for resolution. We may - * or may not discover an ambiguity in the future, even if there are no - * conflicting subsets this round.
- * - *The biggest sin is to terminate early because it means we've made a - * decision but were uncertain as to the eventual outcome. We haven't used - * enough lookahead. On the other hand, announcing a conflict too late is no - * big deal; you will still have the conflict. It's just inefficient. It - * might even look until the end of file.
- * - *No special consideration for semantic predicates is required because - * predicates are evaluated on-the-fly for full LL prediction, ensuring that - * no configuration contains a semantic context during the termination - * check.
- * - *CONFLICTING CONFIGS
- * - *Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict - * when {@code i!=j} but {@code x=x'}. Because we merge all - * {@code (s, i, _)} configurations together, that means that there are at - * most {@code n} configurations associated with state {@code s} for - * {@code n} possible alternatives in the decision. The merged stacks - * complicate the comparison of configuration contexts {@code x} and - * {@code x'}. Sam checks to see if one is a subset of the other by calling - * merge and checking to see if the merged result is either {@code x} or - * {@code x'}. If the {@code x} associated with lowest alternative {@code i} - * is the superset, then {@code i} is the only possible prediction since the - * others resolve to {@code min(i)} as well. However, if {@code x} is - * associated with {@code j>i} then at least one stack configuration for - * {@code j} is not in conflict with alternative {@code i}. The algorithm - * should keep going, looking for more lookahead due to the uncertainty.
- * - *For simplicity, I'm doing a equality check between {@code x} and - * {@code x'} that lets the algorithm continue to consume lookahead longer - * than necessary. The reason I like the equality is of course the - * simplicity but also because that is the test you need to detect the - * alternatives that are actually in conflict.
- * - *CONTINUE/STOP RULE
- * - *Continue if union of resolved alternative sets from non-conflicting and - * conflicting alternative subsets has more than one alternative. We are - * uncertain about which alternative to predict.
- * - *The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which - * alternatives are still in the running for the amount of input we've - * consumed at this point. The conflicting sets let us to strip away - * configurations that won't lead to more states because we resolve - * conflicts to the configuration with a minimum alternate for the - * conflicting set.
- * - *CASES
- * - *EXACT AMBIGUITY DETECTION
- * - *If all states report the same conflicting set of alternatives, then we - * know we have the exact ambiguity set.
- * - *|A_i|>1
and
- * A_i = A_j
for all i, j.
In other words, we continue examining lookahead until all {@code A_i} - * have more than one alternative and all {@code A_i} are the same. If - * {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate - * because the resolved set is {@code {1}}. To determine what the real - * ambiguity is, we have to know whether the ambiguity is between one and - * two or one and three so we keep going. We can only stop prediction when - * we need exact ambiguity detection when the sets look like - * {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...
- */ - resolvesToJustOneViableAlt: function (altsets) { - return PredictionMode.getSingleViableAlt(altsets); - }, - - /** - * Determines if every alternative subset in {@code altsets} contains more - * than one alternative. - * - * @param altsets a collection of alternative subsets - * @return {@code true} if every {@link BitSet} in {@code altsets} has - * {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} - */ - allSubsetsConflict: function (altsets) { - return !PredictionMode.hasNonConflictingAltSet(altsets); - }, - /** - * Determines if any single alternative subset in {@code altsets} contains - * exactly one alternative. - * - * @param altsets a collection of alternative subsets - * @return {@code true} if {@code altsets} contains a {@link BitSet} with - * {@link BitSet//cardinality cardinality} 1, otherwise {@code false} - */ - hasNonConflictingAltSet: function (altsets) { - for (let i = 0; i < altsets.length; i++) { - const alts = altsets[i]; - if (alts.length === 1) { - return true; - } - } - return false; - }, - - - /** - * Determines if any single alternative subset in {@code altsets} contains - * more than one alternative. - * - * @param altsets a collection of alternative subsets - * @return {@code true} if {@code altsets} contains a {@link BitSet} with - * {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} - */ - hasConflictingAltSet: function (altsets) { - for (let i = 0; i < altsets.length; i++) { - const alts = altsets[i]; - if (alts.length > 1) { - return true; - } - } - return false; - }, - - - /** - * Determines if every alternative subset in {@code altsets} is equivalent. - * - * @param altsets a collection of alternative subsets - * @return {@code true} if every member of {@code altsets} is equal to the - * others, otherwise {@code false} - */ - allSubsetsEqual: function (altsets) { - let first = null; - for (let i = 0; i < altsets.length; i++) { - const alts = altsets[i]; - if (first === null) { - first = alts; - } else if (alts !== first) { - return false; - } - } - return true; - }, - - - /** - * Returns the unique alternative predicted by all alternative subsets in - * {@code altsets}. If no such alternative exists, this method returns - * {@link ATN//INVALID_ALT_NUMBER}. - * - * @param altsets a collection of alternative subsets - */ - getUniqueAlt: function (altsets) { - const all = PredictionMode.getAlts(altsets); - if (all.length === 1) { - return all.nextSetBit(0); - } else { - return ATN.INVALID_ALT_NUMBER; - } - }, - - /** - * Gets the complete set of represented alternatives for a collection of - * alternative subsets. This method returns the union of each {@link BitSet} - * in {@code altsets}. - * - * @param altsets a collection of alternative subsets - * @return the set of represented alternatives in {@code altsets} - */ - getAlts: function (altsets) { - const all = new BitSet(); - altsets.map(function (alts) { all.or(alts); }); - return all; - }, - - /** - * This function gets the conflicting alt subsets from a configuration set. - * For each configuration {@code c} in {@code configs}: - * - *- * map[c] U= c.{@link ATNConfig//alt alt} // map hash/equals uses s and x, not - * alt and not pred - *- */ - getConflictingAltSubsets: function (configs) { - const configToAlts = new HashMap(); - configToAlts.hashFunction = function (cfg) { HashCode.hashStuff(cfg.state.stateNumber, cfg.context); }; - configToAlts.equalsFunction = function (c1, c2) { return c1.state.stateNumber === c2.state.stateNumber && c1.context.equals(c2.context); }; - configs.items.map(function (cfg) { - let alts = configToAlts.get(cfg); - if (alts === null) { - alts = new BitSet(); - configToAlts.set(cfg, alts); - } - alts.set(cfg.alt); - }); - return configToAlts.getValues(); - }, - - /** - * Get a map from state to alt subset from a configuration set. For each - * configuration {@code c} in {@code configs}: - * - *
- * map[c.{@link ATNConfig//state state}] U= c.{@link ATNConfig//alt alt} - *- */ - getStateToAltMap: function (configs) { - const m = new AltDict(); - configs.items.map(function (c) { - let alts = m.get(c.state); - if (alts === null) { - alts = new BitSet(); - m.set(c.state, alts); - } - alts.set(c.alt); - }); - return m; - }, - - hasStateAssociatedWithOneAlt: function (configs) { - const values = PredictionMode.getStateToAltMap(configs).values(); - for (let i = 0; i < values.length; i++) { - if (values[i].length === 1) { - return true; - } - } - return false; - }, - - getSingleViableAlt: function (altsets) { - let result = null; - for (let i = 0; i < altsets.length; i++) { - const alts = altsets[i]; - const minAlt = alts.nextSetBit(0); - if (result === null) { - result = minAlt; - } else if (result !== minAlt) { // more than 1 viable alt - return ATN.INVALID_ALT_NUMBER; - } - } - return result; - } -}; diff --git a/src/atn/PredictionMode.ts b/src/atn/PredictionMode.ts new file mode 100644 index 0000000..42ad410 --- /dev/null +++ b/src/atn/PredictionMode.ts @@ -0,0 +1,577 @@ +/* + * Copyright (c) The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +/* eslint-disable @typescript-eslint/naming-convention, jsdoc/require-returns, jsdoc/require-param */ + +import { ATN } from "./ATN.js"; +import { RuleStopState } from "./RuleStopState.js"; +import { ATNConfigSet } from "./ATNConfigSet.js"; +import { ATNConfig } from "./ATNConfig.js"; +import { SemanticContext } from "./SemanticContext.js"; +import { BitSet } from "../misc/BitSet.js"; +import { HashCode } from "../misc/HashCode.js"; +import { HashMap } from "../misc/HashMap.js"; +import { ATNState } from "./ATNState.js"; + +/** + * This enumeration defines the prediction modes available in ANTLR 4 along with + * utility methods for analyzing configuration sets for conflicts and/or + * ambiguities. + */ +export class PredictionMode { + /** + * The SLL(*) prediction mode. This prediction mode ignores the current + * parser context when making predictions. This is the fastest prediction + * mode, and provides correct results for many grammars. This prediction + * mode is more powerful than the prediction mode provided by ANTLR 3, but + * may result in syntax errors for grammar and input combinations which are + * not SLL. + * + *
+ * When using this prediction mode, the parser will either return a correct + * parse tree (i.e. the same parse tree that would be returned with the + * {@link LL} prediction mode), or it will report a syntax error. If a + * syntax error is encountered when using the {@link SLL} prediction mode, + * it may be due to either an actual syntax error in the input or indicate + * that the particular combination of grammar and input requires the more + * powerful {@link LL} prediction abilities to complete successfully.
+ * + *+ * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.
+ */ + public static readonly SLL: number = 0; + + /** + * The LL(*) prediction mode. This prediction mode allows the current parser + * context to be used for resolving SLL conflicts that occur during + * prediction. This is the fastest prediction mode that guarantees correct + * parse results for all combinations of grammars with syntactically correct + * inputs. + * + *+ * When using this prediction mode, the parser will make correct decisions + * for all syntactically-correct grammar and input combinations. However, in + * cases where the grammar is truly ambiguous this prediction mode might not + * report a precise answer for exactly which alternatives are + * ambiguous.
+ * + *+ * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.
+ */ + public static readonly LL: number = 1; + + /** + * + * The LL(*) prediction mode with exact ambiguity detection. In addition to + * the correctness guarantees provided by the {@link LL} prediction mode, + * this prediction mode instructs the prediction algorithm to determine the + * complete and exact set of ambiguous alternatives for every ambiguous + * decision encountered while parsing. + * + *+ * This prediction mode may be used for diagnosing ambiguities during + * grammar development. Due to the performance overhead of calculating sets + * of ambiguous alternatives, this prediction mode should be avoided when + * the exact results are not necessary.
+ * + *+ * This prediction mode does not provide any guarantees for prediction + * behavior for syntactically-incorrect inputs.
+ */ + public static readonly LL_EXACT_AMBIG_DETECTION: number = 2; + + /** + * + *Computes the SLL prediction termination condition. + * + *+ *This method computes the SLL prediction termination condition for both of + *the following cases.
+ * + *COMBINED SLL+LL PARSING
+ * + *When LL-fallback is enabled upon SLL conflict, correct predictions are + *ensured regardless of how the termination condition is computed by this + *method. Due to the substantially higher cost of LL prediction, the + *prediction should only fall back to LL when the additional lookahead + *cannot lead to a unique SLL prediction.
+ * + *Assuming combined SLL+LL parsing, an SLL configuration set with only + *conflicting subsets should fall back to full LL, even if the + *configuration sets don't resolve to the same alternative (e.g. + *{@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting + *configuration, SLL could continue with the hopes that more lookahead will + *resolve via one of those non-conflicting configurations.
+ * + *Here's the prediction termination rule them: SLL (for SLL+LL parsing) + *stops when it sees only conflicting configuration subsets. In contrast, + *full LL keeps going when there is uncertainty.
+ * + *HEURISTIC
+ * + *As a heuristic, we stop prediction when we see any conflicting subset + *unless we see a state that only has one alternative associated with it. + *The single-alt-state thing lets prediction continue upon rules like + *(otherwise, it would admit defeat too soon):
+ * + *{@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;}
+ * + *When the ATN simulation reaches the state before {@code ';'}, it has a + *DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally + *{@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop + *processing this node because alternative to has another way to continue, + *via {@code [6|2|[]]}.
+ * + *It also let's us continue for this rule:
+ * + *{@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;}
+ * + *After matching input A, we reach the stop state for rule A, state 1. + *State 8 is the state right before B. Clearly alternatives 1 and 2 + *conflict and no amount of further lookahead will separate the two. + *However, alternative 3 will be able to continue and so we do not stop + *working on this state. In the previous example, we're concerned with + *states associated with the conflicting alternatives. Here alt 3 is not + *associated with the conflicting configs, but since we can continue + *looking for input reasonably, don't declare the state done.
+ * + *PURE SLL PARSING
+ * + *To handle pure SLL parsing, all we have to do is make sure that we + *combine stack contexts for configurations that differ only by semantic + *predicate. From there, we can do the usual SLL termination heuristic.
+ * + *PREDICATES IN SLL+LL PARSING
+ * + *SLL decisions don't evaluate predicates until after they reach DFA stop + *states because they need to create the DFA cache that works in all + *semantic situations. In contrast, full LL evaluates predicates collected + *during start state computation so it can ignore predicates thereafter. + *This means that SLL termination detection can totally ignore semantic + *predicates.
+ * + *Implementation-wise, {@link ATNConfigSet} combines stack contexts but not + *semantic predicate contexts so we might see two configurations like the + *following.
+ * + *{@code (s, 1, x, {}), (s, 1, x', {p})}
+ * + *Before testing these configurations against others, we have to merge + *{@code x} and {@code x'} (without modifying the existing configurations). + *For example, we test {@code (x+x')==x''} when looking for conflicts in + *the following configurations.
+ * + *{@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})}
+ * + *If the configuration set has predicates (as indicated by + *{@link ATNConfigSet//hasSemanticContext}), this algorithm makes a copy of + *the configurations to strip out all of the predicates so that a standard + *{@link ATNConfigSet} will merge everything ignoring predicates.
+ */ + public static hasSLLConflictTerminatingPrediction(mode: number, configs: ATNConfigSet): boolean { + // Configs in rule stop states indicate reaching the end of the decision + // rule (local context) or end of start rule (full context). If all + // configs meet this condition, then none of the configurations is able + // to match additional input so we terminate prediction. + // + if (PredictionMode.allConfigsInRuleStopStates(configs)) { + return true; + } + // pure SLL mode parsing + if (mode === PredictionMode.SLL) { + // Don't bother with combining configs from different semantic + // contexts if we can fail over to full LL; costs more time + // since we'll often fail over anyway. + if (configs.hasSemanticContext) { + // dup configs, tossing out semantic predicates + const dup = new ATNConfigSet(); + for (let c of configs.items) { + c = new ATNConfig({ semanticContext: SemanticContext.NONE }, c); + dup.add(c); + } + configs = dup; + } + // now we have combined contexts for configs with dissimilar preds + } + // pure SLL or combined SLL+LL mode parsing + const altSets = PredictionMode.getConflictingAltSubsets(configs); + + return PredictionMode.hasConflictingAltSet(altSets) && !PredictionMode.hasStateAssociatedWithOneAlt(configs); + }; + + /** + * Checks if any configuration in {@code configs} is in a + * {@link RuleStopState}. Configurations meeting this condition have reached + * the end of the decision rule (local context) or end of start rule (full + * context). + * + * @param configs the configuration set to test + * @returns `true` if any configuration in {@code configs} is in a + * {@link RuleStopState}, otherwise {@code false} + */ + public static hasConfigInRuleStopState(configs: ATNConfigSet): boolean { + for (const c of configs.items) { + if (c.state instanceof RuleStopState) { + return true; + } + } + + return false; + }; + + /** + * Checks if all configurations in {@code configs} are in a + * {@link RuleStopState}. Configurations meeting this condition have reached + * the end of the decision rule (local context) or end of start rule (full + * context). + * + * @param configs the configuration set to test + * @returns `true` if all configurations in {@code configs} are in a + * {@link RuleStopState}, otherwise {@code false} + */ + public static allConfigsInRuleStopStates(configs: ATNConfigSet): boolean { + for (const c of configs.items) { + if (!(c.state instanceof RuleStopState)) { + return false; + } + } + + return true; + }; + + /** + * + *Full LL prediction termination. + * + *Can we stop looking ahead during ATN simulation or is there some + *uncertainty as to which alternative we will ultimately pick, after + *consuming more input? Even if there are partial conflicts, we might know + *that everything is going to resolve to the same minimum alternative. That + *means we can stop since no more lookahead will change that fact. On the + *other hand, there might be multiple conflicts that resolve to different + *minimums. That means we need more look ahead to decide which of those + *alternatives we should predict.
+ * + *The basic idea is to split the set of configurations {@code C}, into + *conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with + *non-conflicting configurations. Two configurations conflict if they have + *identical {@link ATNConfig//state} and {@link ATNConfig//context} values + *but different {@link ATNConfig//alt} value, e.g. {@code (s, i, ctx, _)} + *and {@code (s, j, ctx, _)} for {@code i!=j}.
+ * + *Reduce these configuration subsets to the set of possible alternatives. + *You can compute the alternative subsets in one pass as follows:
+ * + *{@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in + *{@code C} holding {@code s} and {@code ctx} fixed.
+ * + *Or in pseudo-code, for each configuration {@code c} in {@code C}:
+ * + *+ *map[c] U= c.{@link ATNConfig//alt alt} // map hash/equals uses s and x, not + *alt and not pred + *+ * + *
The values in {@code map} are the set of {@code A_s,ctx} sets.
+ * + *If {@code |A_s,ctx|=1} then there is no conflict associated with + *{@code s} and {@code ctx}.
+ * + *Reduce the subsets to singletons by choosing a minimum of each subset. If + *the union of these alternative subsets is a singleton, then no amount of + *more lookahead will help us. We will always pick that alternative. If, + *however, there is more than one alternative, then we are uncertain which + *alternative to predict and must continue looking for resolution. We may + *or may not discover an ambiguity in the future, even if there are no + *conflicting subsets this round.
+ * + *The biggest sin is to terminate early because it means we've made a + *decision but were uncertain as to the eventual outcome. We haven't used + *enough lookahead. On the other hand, announcing a conflict too late is no + *big deal; you will still have the conflict. It's just inefficient. It + *might even look until the end of file.
+ * + *No special consideration for semantic predicates is required because + *predicates are evaluated on-the-fly for full LL prediction, ensuring that + *no configuration contains a semantic context during the termination + *check.
+ * + *CONFLICTING CONFIGS
+ * + *Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict + *when {@code i!=j} but {@code x=x'}. Because we merge all + *{@code (s, i, _)} configurations together, that means that there are at + *most {@code n} configurations associated with state {@code s} for + *{@code n} possible alternatives in the decision. The merged stacks + *complicate the comparison of configuration contexts {@code x} and + *{@code x'}. Sam checks to see if one is a subset of the other by calling + *merge and checking to see if the merged result is either {@code x} or + *{@code x'}. If the {@code x} associated with lowest alternative {@code i} + *is the superset, then {@code i} is the only possible prediction since the + *others resolve to {@code min(i)} as well. However, if {@code x} is + *associated with {@code j>i} then at least one stack configuration for + *{@code j} is not in conflict with alternative {@code i}. The algorithm + *should keep going, looking for more lookahead due to the uncertainty.
+ * + *For simplicity, I'm doing a equality check between {@code x} and + *{@code x'} that lets the algorithm continue to consume lookahead longer + *than necessary. The reason I like the equality is of course the + *simplicity but also because that is the test you need to detect the + *alternatives that are actually in conflict.
+ * + *CONTINUE/STOP RULE
+ * + *Continue if union of resolved alternative sets from non-conflicting and + *conflicting alternative subsets has more than one alternative. We are + *uncertain about which alternative to predict.
+ * + *The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which + *alternatives are still in the running for the amount of input we've + *consumed at this point. The conflicting sets let us to strip away + *configurations that won't lead to more states because we resolve + *conflicts to the configuration with a minimum alternate for the + *conflicting set.
+ * + *CASES
+ * + *EXACT AMBIGUITY DETECTION
+ * + *If all states report the same conflicting set of alternatives, then we + *know we have the exact ambiguity set.
+ * + *|A_i|>1
and
+ *A_i = A_j
for all i, j.
In other words, we continue examining lookahead until all {@code A_i} + *have more than one alternative and all {@code A_i} are the same. If + *{@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate + *because the resolved set is {@code {1}}. To determine what the real + *ambiguity is, we have to know whether the ambiguity is between one and + *two or one and three so we keep going. We can only stop prediction when + *we need exact ambiguity detection when the sets look like + *{@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc...
+ */ + public static resolvesToJustOneViableAlt(altSets: BitSet[]): number { + return PredictionMode.getSingleViableAlt(altSets); + }; + + /** + * Determines if every alternative subset in {@code altSets} contains more + * than one alternative. + * + * @param altSets a collection of alternative subsets + * @returns `true` if every {@link BitSet} in {@code altSets} has + * {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} + */ + public static allSubsetsConflict(altSets: BitSet[]): boolean { + return !PredictionMode.hasNonConflictingAltSet(altSets); + }; + + /** + * Determines if any single alternative subset in {@code altSets} contains + * exactly one alternative. + * + * @param altSets a collection of alternative subsets + * @returns `true` if {@code altSets} contains a {@link BitSet} with + * {@link BitSet//cardinality cardinality} 1, otherwise {@code false} + */ + public static hasNonConflictingAltSet(altSets: BitSet[]): boolean { + for (const alts of altSets) { + if (alts.length === 1) { + return true; + } + } + + return false; + }; + + /** + * Determines if any single alternative subset in {@code altSets} contains + * more than one alternative. + * + * @param altSets a collection of alternative subsets + * @returns `true` if {@code altSets} contains a {@link BitSet} with + * {@link BitSet//cardinality cardinality} > 1, otherwise {@code false} + */ + public static hasConflictingAltSet(altSets: BitSet[]): boolean { + for (const alts of altSets) { + if (alts.length > 1) { + return true; + } + } + + return false; + }; + + /** + * Determines if every alternative subset in {@code altSets} is equivalent. + * + * @param altSets a collection of alternative subsets + * @returns `true` if every member of {@code altSets} is equal to the + * others, otherwise {@code false} + */ + public static allSubsetsEqual(altSets: BitSet[]): boolean { + let first = null; + for (const alts of altSets) { + if (first === null) { + first = alts; + } else if (alts !== first) { + return false; + } + } + + return true; + }; + + /** + * Returns the unique alternative predicted by all alternative subsets in + * {@code altSets}. If no such alternative exists, this method returns + * {@link ATN//INVALID_ALT_NUMBER}. + * + * @param altSets a collection of alternative subsets + */ + public static getUniqueAlt(altSets: BitSet[]): number { + const all = PredictionMode.getAlts(altSets); + if (all.length === 1) { + return all.nextSetBit(0)!; + } else { + return ATN.INVALID_ALT_NUMBER; + } + }; + + /** + * Gets the complete set of represented alternatives for a collection of + * alternative subsets. This method returns the union of each {@link BitSet} + * in {@code altSets}. + * + * @param altSets a collection of alternative subsets + * @returns the set of represented alternatives in {@code altSets} + */ + public static getAlts(altSets: BitSet[]): BitSet { + const all = new BitSet(); + altSets.forEach((alts) => { + all.or(alts); + }); + + return all; + }; + + /** + * This function gets the conflicting alt subsets from a configuration set. + * For each configuration {@code c} in {@code configs}: + * + *+ * map[c] U= c.{@link ATNConfig//alt alt} // map hash/equals uses s and x, not + * alt and not pred + *+ */ + public static getConflictingAltSubsets(configs: ATNConfigSet): BitSet[] { + const configToAlts = new HashMap
+ * map[c.{@link ATNConfig//state state}] U= c.{@link ATNConfig//alt alt} + *+ */ + public static getStateToAltMap(configs: ATNConfigSet): HashMap
- * Since tokens on hidden channels (e.g. whitespace or comments) are not
- * added to the parse trees, they will not appear in the output of this
- * method.
- */
- public getText(): string;
-
- /**
- * For rule associated with this parse tree internal node, return
- * the outer alternative number used to match the input. Default
- * implementation does not compute nor store this alt num. Create
- * a subclass of ParserRuleContext with backing field and set
- * option contextSuperClass.
- * to set it.
- */
- public getAltNumber(): number;
-
- /**
- * Set the outer alternative number for this context node. Default
- * implementation does nothing to avoid backing field overhead for
- * trees that don't need it. Create
- * a subclass of ParserRuleContext with backing field and set
- * option contextSuperClass.
- */
- public setAltNumber(altNumber: number): void;
-
- public getChild I have scoped the {@link AND}, {@link OR}, and {@link Predicate} subclasses of
- * {@link SemanticContext} within the scope of this outer class. For context dependent predicates, we must pass in a local context so that
- * references such as $arg evaluate properly as _localctx.arg. We only
- * capture context dependent predicates in the context in which we begin
- * prediction, so we passed in the outer context here in case of context
- * dependent predicate evaluation.
- * The evaluation of predicates by this context is short-circuiting, but
- * unordered.
- * The evaluation of predicates by this context is short-circuiting, but
- * unordered. I have scoped the {@link AND}, {@link OR}, and {@link SemanticContext.Predicate} subclasses of
+ * {@link SemanticContext} within the scope of this outer class. For context dependent predicates, we must pass in a local context so that
+ * references such as $arg evaluate properly as _localctx.arg. We only
+ * capture context dependent predicates in the context in which we begin
+ * prediction, so we passed in the outer context here in case of context
+ * dependent predicate evaluation.
+ * The evaluation of predicates by this context is short-circuiting, but
+ * unordered.
+ * The evaluation of predicates by this context is short-circuiting, but
+ * unordered. This is a one way link. It emanates from a state (usually via a list of
- * transitions) and has a target state. Since we never have to change the ATN transitions once we construct it,
- * we can fix these transitions as specific classes. The DFA transitions
- * on the other hand need to update the labels as it adds transitions to
- * the states. We'll use the term Edge for the DFA to distinguish them from
- * ATN transitions. This is a one way link. It emanates from a state (usually via a list of
+ * transitions) and has a target state. Since we never have to change the ATN transitions once we construct it,
+ * we can fix these transitions as specific classes. The DFA transitions
+ * on the other hand need to update the labels as it adds transitions to
+ * the states. We'll use the term Edge for the DFA to distinguish them from
+ * ATN transitions. The default implementation returns {@code false}. I use a set of ATNConfig objects not simple states. An ATNConfig
- * is both a state (ala normal conversion) and a RuleContext describing
- * the chain of rules (if any) followed to arrive at that state. A DFA state may have multiple references to a particular state,
- * but with different ATN contexts (with same or different alts)
- * meaning that state was reached via a different set of rule invocations. We only use these for non-{@link //requiresFullContext} but
- * conflicting states. That
- * means we know from the context (it's $ or we don't dip into outer
- * context) that it's an ambiguity not a conflict. This list is computed by {@link
- * ParserATNSimulator//predicateDFAState}. Because the number of alternatives and number of ATN configurations are
- * finite, there is a finite number of DFA states that can be processed.
- * This is necessary to show that the algorithm terminates. Cannot test the DFA state numbers here because in
- * {@link ParserATNSimulator//addDFAState} we need to know if any other state
- * exists that has this exact set of ATN configurations. The
- * {@link //stateNumber} is irrelevant. I use a set of ATNConfig objects not simple states. An ATNConfig
+ * is both a state (ala normal conversion) and a RuleContext describing
+ * the chain of rules (if any) followed to arrive at that state. A DFA state may have multiple references to a particular state,
+ * but with different ATN contexts (with same or different alts)
+ * meaning that state was reached via a different set of rule invocations. We only use these for non-{@link #requiresFullContext} but conflicting states. That
+ * means we know from the context (it's $ or we don't dip into outer
+ * context) that it's an ambiguity not a conflict. This list is computed by {@link ParserATNSimulator#predicateDFAState}. Because the number of alternatives and number of ATN configurations are
+ * finite, there is a finite number of DFA states that can be processed.
+ * This is necessary to show that the algorithm terminates. Cannot test the DFA state numbers here because in
+ * {@link ParserATNSimulator#addDFAState} we need to know if any other state
+ * exists that has this exact set of ATN configurations. The
+ * {@link #stateNumber} is irrelevant.
- *
- */
- evalPrecedence(parser, outerContext) {
- return this;
- }
-
- static andContext(a, b) {
- if (a === null || a === SemanticContext.NONE) {
- return b;
- }
- if (b === null || b === SemanticContext.NONE) {
- return a;
- }
- const result = new AND(a, b);
- if (result.opnds.length === 1) {
- return result.opnds[0];
- } else {
- return result;
- }
- }
-
- static orContext(a, b) {
- if (a === null) {
- return b;
- }
- if (b === null) {
- return a;
- }
- if (a === SemanticContext.NONE || b === SemanticContext.NONE) {
- return SemanticContext.NONE;
- }
- const result = new OR(a, b);
- if (result.opnds.length === 1) {
- return result.opnds[0];
- } else {
- return result;
- }
- }
-}
-
-class AND extends SemanticContext {
- /**
- * A semantic context which is true whenever none of the contained contexts
- * is false
- */
- constructor(a, b) {
- super();
- const operands = new HashSet();
- if (a instanceof AND) {
- a.opnds.map(function (o) {
- operands.add(o);
- });
- } else {
- operands.add(a);
- }
- if (b instanceof AND) {
- b.opnds.map(function (o) {
- operands.add(o);
- });
- } else {
- operands.add(b);
- }
- const precedencePredicates = filterPrecedencePredicates(operands);
- if (precedencePredicates.length > 0) {
- // interested in the transition with the lowest precedence
- let reduced = null;
- precedencePredicates.map(function (p) {
- if (reduced === null || p.precedence < reduced.precedence) {
- reduced = p;
- }
- });
- operands.add(reduced);
- }
- this.opnds = Array.from(operands.values());
- }
-
- equals(other) {
- if (this === other) {
- return true;
- } else if (!(other instanceof AND)) {
- return false;
- } else {
- return equalArrays(this.opnds, other.opnds);
- }
- }
-
- updateHashCode(hash) {
- hash.update(this.opnds, "AND");
- }
-
- /**
- * {@inheritDoc}
- *
- *
+ *
+ */
+ public evalPrecedence
- *
- *
- * @param precedenceDfa {@code true} if this is a precedence DFA; otherwise,
- * {@code false}
- */
- setPrecedenceDfa(precedenceDfa) {
- if (this.precedenceDfa !== precedenceDfa) {
- this._states = new HashSet();
- if (precedenceDfa) {
- const precedenceState = new DFAState(null, new ATNConfigSet());
- precedenceState.edges = [];
- precedenceState.isAcceptState = false;
- precedenceState.requiresFullContext = false;
- this.s0 = precedenceState;
- } else {
- this.s0 = null;
- }
- this.precedenceDfa = precedenceDfa;
- }
- }
-
- /**
- * Return a list of all states in this DFA, ordered by state number.
- */
- sortedStates() {
- const list = this._states.values();
- return list.sort(function (a, b) {
- return a.stateNumber - b.stateNumber;
- });
- }
-
- toString(literalNames, symbolicNames) {
- literalNames = literalNames || null;
- symbolicNames = symbolicNames || null;
- if (this.s0 === null) {
- return "";
- }
- const serializer = new DFASerializer(this, literalNames, symbolicNames);
- return serializer.toString();
- }
-
- toLexerString() {
- if (this.s0 === null) {
- return "";
- }
- const serializer = new LexerDFASerializer(this);
- return serializer.toString();
- }
-
- get states() {
- return this._states;
- }
-}
diff --git a/src/dfa/DFA.ts b/src/dfa/DFA.ts
new file mode 100644
index 0000000..fbc37bf
--- /dev/null
+++ b/src/dfa/DFA.ts
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+import { DFASerializer } from "./DFASerializer.js";
+import { DFAState } from "./DFAState.js";
+import { LexerDFASerializer } from "./LexerDFASerializer.js";
+import { Vocabulary } from "../Vocabulary.js";
+import { DecisionState } from "../atn/DecisionState.js";
+import { StarLoopEntryState } from "../atn/StarLoopEntryState.js";
+import { HashSet } from "../misc/HashSet.js";
+
+export class DFA {
+ /**
+ * A set of all DFA states. Use {@link Map} so we can get old state back
+ * ({@link Set} only allows you to see if it's there).
+ */
+
+ public readonly states = new HashSet(_recognizer: Recognizer