From 78cfeb6598695a4dd914c74af7167dac1c8dc32b Mon Sep 17 00:00:00 2001 From: Mike Lischke Date: Sat, 2 Mar 2024 14:48:48 +0100 Subject: [PATCH] Converted ATNState.stateType back to in an instance member. Querying the static member if you only have an instance leads to ugly code. Signed-off-by: Mike Lischke --- .vscode/launch.json | 4 +++ src/DefaultErrorStrategy.ts | 2 +- src/Parser.ts | 7 ++++- src/ParserInterpreter.ts | 6 ++-- src/atn/ATNConfigSet.ts | 54 ++++++++++++++------------------- src/atn/ATNSerializer.ts | 8 ++--- src/atn/BasicBlockStartState.ts | 2 +- src/atn/BasicState.ts | 2 +- src/atn/BlockEndState.ts | 2 +- src/atn/LL1Analyzer.ts | 2 +- src/atn/LexerATNSimulator.ts | 2 +- src/atn/LoopEndState.ts | 2 +- src/atn/OrderedATNConfigSet.ts | 2 +- src/atn/ParserATNSimulator.ts | 16 +++++----- src/atn/PlusBlockStartState.ts | 2 +- src/atn/PlusLoopbackState.ts | 2 +- src/atn/RuleStartState.ts | 2 +- src/atn/RuleStopState.ts | 2 +- src/atn/StarBlockStartState.ts | 2 +- src/atn/StarLoopEntryState.ts | 2 +- src/atn/StarLoopbackState.ts | 2 +- src/atn/TokensStartState.ts | 2 +- src/dfa/DFA.ts | 33 +++++++++++++++----- 23 files changed, 88 insertions(+), 72 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index e2fe24e..e4ea0e8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -35,11 +35,15 @@ ], "runtimeArgs": [ "--no-warnings=ExperimentalWarning", + "--enable-source-maps", "--loader", "ts-node/esm", "tests/benchmarks/run-benchmarks.ts", ], "sourceMaps": true, + "outFiles": [ + "dist/*.?js" + ], "resolveSourceMapLocations": [ "${workspaceFolder}/**", "!**/node_modules/**" diff --git a/src/DefaultErrorStrategy.ts b/src/DefaultErrorStrategy.ts index b5d76a0..26e53ce 100644 --- a/src/DefaultErrorStrategy.ts +++ b/src/DefaultErrorStrategy.ts @@ -221,7 +221,7 @@ export class DefaultErrorStrategy { return; } - switch ((s.constructor as typeof ATNState).stateType) { + switch (s.stateType) { case ATNState.BLOCK_START: case ATNState.STAR_BLOCK_START: case ATNState.PLUS_BLOCK_START: diff --git a/src/Parser.ts b/src/Parser.ts index 461361f..c40d80e 100644 --- a/src/Parser.ts +++ b/src/Parser.ts @@ -350,7 +350,12 @@ export abstract class Parser extends Recognizer { return this.#bypassAltsAtnCache; } - const deserializationOptions = { readOnly: false, verifyATN: true, generateRuleBypassTransitions: true }; + const deserializationOptions = { + readOnly: false, + verifyATN: true, + generateRuleBypassTransitions: true, + optimize: true, + }; this.#bypassAltsAtnCache = new ATNDeserializer(deserializationOptions).deserialize(serializedAtn); return this.#bypassAltsAtnCache; diff --git a/src/ParserInterpreter.ts b/src/ParserInterpreter.ts index d8275ea..4192500 100644 --- a/src/ParserInterpreter.ts +++ b/src/ParserInterpreter.ts @@ -116,7 +116,7 @@ export class ParserInterpreter extends Parser { while (true) { const p = this.atnState; - switch ((p.constructor as typeof ATNState).stateType) { + switch (p.stateType) { case ATNState.RULE_STOP: // pop; return from rule if (this.context?.isEmpty) { @@ -180,8 +180,8 @@ export class ParserInterpreter extends Parser { const transition = p.transitions[predictedAlt - 1]; switch (transition.transitionType) { case Transition.EPSILON: - if (this.#pushRecursionContextStates.get(p.stateNumber) && - !((transition.target.constructor as typeof ATNState).stateType === ATNState.LOOP_END)) { + if (this.#pushRecursionContextStates.get(p.stateNumber) + && !(transition.target.stateType === ATNState.LOOP_END)) { // We are at the start of a left recursive rule's (...)* loop // and we're not taking the exit branch of loop. const parentContext = this.#parentContextStack[this.#parentContextStack.length - 1]; diff --git a/src/atn/ATNConfigSet.ts b/src/atn/ATNConfigSet.ts index a5623a2..d340d77 100644 --- a/src/atn/ATNConfigSet.ts +++ b/src/atn/ATNConfigSet.ts @@ -52,20 +52,16 @@ class KeyTypeEqualityComparer implements EqualityComparator { export class ATNConfigSet { /** * The reason that we need this is because we don't want the hash map to use - * the standard hash code and equals. We need all configurations with the - * same - * `(s,i,_,semctx)` to be equal. Unfortunately, this key effectively - * doubles - * the number of objects associated with ATNConfigs. The other solution is - * to + * the standard hash code and equals. We need all configurations with the same + * `(s,i,_,semctx)` to be equal. Unfortunately, this key effectively doubles + * the number of objects associated with ATNConfigs. The other solution is to * use a hash table that lets us specify the equals/hashCode operation. * All configs but hashed by (s, i, _, pi) not including context. Wiped out * when we go readonly as this set becomes a DFA state */ - public configLookup: HashSet | null = - new HashSet(KeyTypeEqualityComparer.instance); + public configLookup: HashSet | null = new HashSet(KeyTypeEqualityComparer.instance); - // Track the elements as they are added to the set; supports get(i). + /** Track the elements as they are added to the set; supports get(i). */ public configs: ATNConfig[] = []; public uniqueAlt = 0; @@ -103,20 +99,20 @@ export class ATNConfigSet { #cachedHashCode = -1; - public constructor(fullCtxOrOldSet?: boolean | ATNConfigSet) { - if (fullCtxOrOldSet !== undefined) { - if (typeof fullCtxOrOldSet === "boolean") { - this.fullCtx = fullCtxOrOldSet ?? true; - } else { - const old = fullCtxOrOldSet; - - this.addAll(old.configs); - this.uniqueAlt = old.uniqueAlt; - this.conflictingAlts = old.conflictingAlts; - this.hasSemanticContext = old.hasSemanticContext; - this.dipsIntoOuterContext = old.dipsIntoOuterContext; - } - } + public constructor(fullCtx: boolean = true) { + this.fullCtx = fullCtx; + } + + public static duplicate(set: ATNConfigSet): ATNConfigSet { + const result = new ATNConfigSet(true); + + result.addAll(set.configs); + result.uniqueAlt = set.uniqueAlt; + result.conflictingAlts = set.conflictingAlts; + result.hasSemanticContext = set.hasSemanticContext; + result.dipsIntoOuterContext = set.dipsIntoOuterContext; + + return result; } public [Symbol.iterator](): IterableIterator { @@ -137,7 +133,7 @@ export class ATNConfigSet { throw new Error("This set is readonly"); } - if (!this.firstStopState && (config.state.constructor as typeof ATNState).stateType === ATNState.RULE_STOP) { + if (!this.firstStopState && config.state.stateType === ATNState.RULE_STOP) { this.firstStopState = config; } @@ -189,14 +185,8 @@ export class ATNConfigSet { } public getPredicates(): SemanticContext[] { - const preds = []; - for (const config of this.configs) { - if (config.semanticContext !== SemanticContext.NONE) { - preds.push(config.semanticContext); - } - } - - return preds; + return this.configs.filter((config) => { return config.semanticContext !== SemanticContext.NONE; }) + .map((config) => { return config.semanticContext; }); } public getStates(): HashSet { diff --git a/src/atn/ATNSerializer.ts b/src/atn/ATNSerializer.ts index 74c8ec9..2526672 100644 --- a/src/atn/ATNSerializer.ts +++ b/src/atn/ATNSerializer.ts @@ -225,7 +225,7 @@ export class ATNSerializer { continue; } - const stateType = (s.constructor as typeof ATNState).stateType; + const stateType = s.stateType; if (s instanceof DecisionState && s.nonGreedy) { this.nonGreedyStates.push(s.stateNumber); } @@ -238,7 +238,7 @@ export class ATNSerializer { this.data.push(s.ruleIndex); - if ((s.constructor as typeof ATNState).stateType === ATNState.LOOP_END) { + if (s.stateType === ATNState.LOOP_END) { this.data.push((s as LoopEndState).loopBackState!.stateNumber); } else { if (s instanceof BlockStartState) { @@ -246,7 +246,7 @@ export class ATNSerializer { } } - if ((s.constructor as typeof ATNState).stateType !== ATNState.RULE_STOP) { + if (s.stateType !== ATNState.RULE_STOP) { // the deserializer can trivially derive these edges, so there's no need to serialize them edgeCount += s.transitions.length; } @@ -273,7 +273,7 @@ export class ATNSerializer { continue; } - if ((s.constructor as typeof ATNState).stateType === ATNState.RULE_STOP) { + if (s.stateType === ATNState.RULE_STOP) { continue; } diff --git a/src/atn/BasicBlockStartState.ts b/src/atn/BasicBlockStartState.ts index 785c232..f549e18 100644 --- a/src/atn/BasicBlockStartState.ts +++ b/src/atn/BasicBlockStartState.ts @@ -8,5 +8,5 @@ import { ATNState } from "./ATNState.js"; import { BlockStartState } from "./BlockStartState.js"; export class BasicBlockStartState extends BlockStartState { - public static override readonly stateType = ATNState.BLOCK_START; + public override get stateType(): number { return ATNState.BLOCK_START; } } diff --git a/src/atn/BasicState.ts b/src/atn/BasicState.ts index 464fd58..91b7161 100644 --- a/src/atn/BasicState.ts +++ b/src/atn/BasicState.ts @@ -7,5 +7,5 @@ import { ATNState } from "./ATNState.js"; export class BasicState extends ATNState { - public static override readonly stateType = ATNState.BASIC; + public override get stateType(): number { return ATNState.BASIC; } } diff --git a/src/atn/BlockEndState.ts b/src/atn/BlockEndState.ts index a3419d9..25b627a 100644 --- a/src/atn/BlockEndState.ts +++ b/src/atn/BlockEndState.ts @@ -11,7 +11,7 @@ import { BlockStartState } from "./BlockStartState.js"; * Terminal node of a simple `(a|b|c)` block. */ export class BlockEndState extends ATNState { - public static override readonly stateType = ATNState.BLOCK_END; + public override get stateType(): number { return ATNState.BLOCK_END; } public startState?: BlockStartState; } diff --git a/src/atn/LL1Analyzer.ts b/src/atn/LL1Analyzer.ts index 3488610..a147f06 100644 --- a/src/atn/LL1Analyzer.ts +++ b/src/atn/LL1Analyzer.ts @@ -153,7 +153,7 @@ export class LL1Analyzer { return; } } - if ((s.constructor as typeof ATNState).stateType === ATNState.RULE_STOP) { + if (s.stateType === ATNState.RULE_STOP) { if (!ctx) { look.addOne(Token.EPSILON); diff --git a/src/atn/LexerATNSimulator.ts b/src/atn/LexerATNSimulator.ts index ef1f9aa..5a15e01 100644 --- a/src/atn/LexerATNSimulator.ts +++ b/src/atn/LexerATNSimulator.ts @@ -388,7 +388,7 @@ export class LexerATNSimulator extends ATNSimulator { private closure(input: CharStream, config: LexerATNConfig, configs: ATNConfigSet, currentAltReachedAcceptState: boolean, speculative: boolean, treatEofAsEpsilon: boolean): boolean { let cfg = null; - if ((config.state.constructor as typeof ATNState).stateType === ATNState.RULE_STOP) { + if (config.state.stateType === ATNState.RULE_STOP) { if (!config.context || config.context.hasEmptyPath()) { if (!config.context || config.context.isEmpty()) { configs.add(config); diff --git a/src/atn/LoopEndState.ts b/src/atn/LoopEndState.ts index bd82bc6..c46e34b 100644 --- a/src/atn/LoopEndState.ts +++ b/src/atn/LoopEndState.ts @@ -10,7 +10,7 @@ import { ATNState } from "./ATNState.js"; * Mark the end of a * or + loop */ export class LoopEndState extends ATNState { - public static override readonly stateType = ATNState.LOOP_END; + public override get stateType(): number { return ATNState.LOOP_END; } public loopBackState?: ATNState; } diff --git a/src/atn/OrderedATNConfigSet.ts b/src/atn/OrderedATNConfigSet.ts index d71d286..1c39d6d 100644 --- a/src/atn/OrderedATNConfigSet.ts +++ b/src/atn/OrderedATNConfigSet.ts @@ -9,7 +9,7 @@ import { OrderedHashSet } from "../misc/OrderedHashSet.js"; export class OrderedATNConfigSet extends ATNConfigSet { public constructor() { - super(); + super(true); this.configLookup = new OrderedHashSet(); } } diff --git a/src/atn/ParserATNSimulator.ts b/src/atn/ParserATNSimulator.ts index ff8e89b..a99fdee 100644 --- a/src/atn/ParserATNSimulator.ts +++ b/src/atn/ParserATNSimulator.ts @@ -335,7 +335,7 @@ export class ParserATNSimulator extends ATNSimulator { } const fullCtx = false; - let s0_closure = this.computeStartState(dfa.atnStartState!, ParserRuleContext.empty, fullCtx); + let startState = this.computeStartState(dfa.atnStartState!, ParserRuleContext.empty, fullCtx); if (dfa.isPrecedenceDfa) { // If this is a precedence DFA, we use applyPrecedenceFilter @@ -343,11 +343,11 @@ export class ParserATNSimulator extends ATNSimulator { // state. We then use DFA.setPrecedenceStartState to set the // appropriate start state for the precedence level rather // than simply setting DFA.s0. - s0_closure = this.applyPrecedenceFilter(s0_closure); - s0 = this.addDFAState(dfa, DFAState.fromConfigs(s0_closure)); + startState = this.applyPrecedenceFilter(startState); + s0 = this.addDFAState(dfa, DFAState.fromConfigs(startState)); dfa.setPrecedenceStartState(this.parser.getPrecedence(), s0); } else { - s0 = this.addDFAState(dfa, DFAState.fromConfigs(s0_closure)); + s0 = this.addDFAState(dfa, DFAState.fromConfigs(startState)); dfa.s0 = s0; } } @@ -1274,7 +1274,7 @@ export class ParserATNSimulator extends ATNSimulator { const c = this.getEpsilonTarget(config, t, continueCollecting, depth === 0, fullCtx, treatEofAsEpsilon); if (c) { let newDepth = depth; - if ((config.state.constructor as typeof ATNState).stateType === ATNState.RULE_STOP) { + if (config.state.stateType === ATNState.RULE_STOP) { // target fell off end of rule; mark resulting c as having dipped into outer context // We can't get here if incoming config was rule stop and we had context // track how far we dip into outer context. Might @@ -1322,7 +1322,7 @@ export class ParserATNSimulator extends ATNSimulator { // the context has an empty stack case. If so, it would mean // global FOLLOW so we can't perform optimization // Are we the special loop entry/exit state? or SLL wildcard - if ((p.constructor as typeof ATNState).stateType !== ATNState.STAR_LOOP_ENTRY || !config.context) { + if (p.stateType !== ATNState.STAR_LOOP_ENTRY || !config.context) { return false; } @@ -1356,7 +1356,7 @@ export class ParserATNSimulator extends ATNSimulator { // Look for prefix op case like 'not expr', (' type ')' expr const returnStateTarget = returnState.transitions[0].target; - if ((returnState.constructor as typeof ATNState).stateType === ATNState.BLOCK_END + if (returnState.stateType === ATNState.BLOCK_END && returnStateTarget === p) { continue; } @@ -1376,7 +1376,7 @@ export class ParserATNSimulator extends ATNSimulator { // Look for complex prefix 'between expr and expr' case where 2nd expr's // return state points at block end state of (...)* internal block - if ((returnStateTarget.constructor as typeof ATNState).stateType === ATNState.BLOCK_END + if (returnStateTarget.stateType === ATNState.BLOCK_END && returnStateTarget.transitions.length === 1 && returnStateTarget.transitions[0].isEpsilon && returnStateTarget.transitions[0].target === p) { continue; diff --git a/src/atn/PlusBlockStartState.ts b/src/atn/PlusBlockStartState.ts index 4df43ff..693fd3c 100644 --- a/src/atn/PlusBlockStartState.ts +++ b/src/atn/PlusBlockStartState.ts @@ -15,7 +15,7 @@ import { PlusLoopbackState } from "./PlusLoopbackState.js"; * real decision-making note for `A+` */ export class PlusBlockStartState extends BlockStartState { - public static override readonly stateType = ATNState.PLUS_BLOCK_START; + public override get stateType(): number { return ATNState.PLUS_BLOCK_START; } public loopBackState: PlusLoopbackState; } diff --git a/src/atn/PlusLoopbackState.ts b/src/atn/PlusLoopbackState.ts index 5a7a64a..58fc2a0 100644 --- a/src/atn/PlusLoopbackState.ts +++ b/src/atn/PlusLoopbackState.ts @@ -12,5 +12,5 @@ import { DecisionState } from "./DecisionState.js"; * one to the loop back to start of the block and one to exit. */ export class PlusLoopbackState extends DecisionState { - public static override readonly stateType = ATNState.PLUS_LOOP_BACK; + public override get stateType(): number { return ATNState.PLUS_LOOP_BACK; } } diff --git a/src/atn/RuleStartState.ts b/src/atn/RuleStartState.ts index ec07b62..d3143c5 100644 --- a/src/atn/RuleStartState.ts +++ b/src/atn/RuleStartState.ts @@ -8,7 +8,7 @@ import { ATNState } from "./ATNState.js"; import { RuleStopState } from "./RuleStopState.js"; export class RuleStartState extends ATNState { - public static override readonly stateType = ATNState.RULE_START; + public override get stateType(): number { return ATNState.RULE_START; } public stopState?: RuleStopState; public isLeftRecursiveRule: boolean = false; diff --git a/src/atn/RuleStopState.ts b/src/atn/RuleStopState.ts index e107e74..8424de5 100644 --- a/src/atn/RuleStopState.ts +++ b/src/atn/RuleStopState.ts @@ -13,6 +13,6 @@ import { ATNState } from "./ATNState.js"; * error handling */ export class RuleStopState extends ATNState { - public static override readonly stateType = ATNState.RULE_STOP; + public override get stateType(): number { return ATNState.RULE_STOP; } } diff --git a/src/atn/StarBlockStartState.ts b/src/atn/StarBlockStartState.ts index 1a28592..4d6ec3f 100644 --- a/src/atn/StarBlockStartState.ts +++ b/src/atn/StarBlockStartState.ts @@ -11,5 +11,5 @@ import { BlockStartState } from "./BlockStartState.js"; * The block that begins a closure loop */ export class StarBlockStartState extends BlockStartState { - public static override readonly stateType = ATNState.STAR_BLOCK_START; + public override get stateType(): number { return ATNState.STAR_BLOCK_START; } } diff --git a/src/atn/StarLoopEntryState.ts b/src/atn/StarLoopEntryState.ts index 25fa330..63125b2 100644 --- a/src/atn/StarLoopEntryState.ts +++ b/src/atn/StarLoopEntryState.ts @@ -9,7 +9,7 @@ import { DecisionState } from "./DecisionState.js"; import { StarLoopbackState } from "./StarLoopbackState.js"; export class StarLoopEntryState extends DecisionState { - public static override readonly stateType = ATNState.STAR_LOOP_ENTRY; + public override get stateType(): number { return ATNState.STAR_LOOP_ENTRY; } // This is always set during ATN deserialization public loopBackState!: StarLoopbackState; diff --git a/src/atn/StarLoopbackState.ts b/src/atn/StarLoopbackState.ts index ec1e080..905876b 100644 --- a/src/atn/StarLoopbackState.ts +++ b/src/atn/StarLoopbackState.ts @@ -7,5 +7,5 @@ import { ATNState } from "./ATNState.js"; export class StarLoopbackState extends ATNState { - public static override readonly stateType = ATNState.STAR_LOOP_BACK; + public override get stateType(): number { return ATNState.STAR_LOOP_BACK; } } diff --git a/src/atn/TokensStartState.ts b/src/atn/TokensStartState.ts index cc80958..e59e9f5 100644 --- a/src/atn/TokensStartState.ts +++ b/src/atn/TokensStartState.ts @@ -11,5 +11,5 @@ import { DecisionState } from "./DecisionState.js"; * The Tokens rule start state linking to each lexer rule start state */ export class TokensStartState extends DecisionState { - public static override readonly stateType = ATNState.TOKEN_START; + public override get stateType(): number { return ATNState.TOKEN_START; } } diff --git a/src/dfa/DFA.ts b/src/dfa/DFA.ts index db14006..7844c99 100644 --- a/src/dfa/DFA.ts +++ b/src/dfa/DFA.ts @@ -12,6 +12,14 @@ import { DecisionState } from "../atn/DecisionState.js"; import { StarLoopEntryState } from "../atn/StarLoopEntryState.js"; import type { ATNConfigSet } from "../index.js"; +/** + * Debugging helper to figure out if a ATNConfigSet collision exists, in case of unexpected parse errors. + * The DFA class only uses hash codes to find configs (for performance reasons). + * All tests so far indicate this is a valid optimization. By setting this to true, an additional check is + * done to ensure we do not have any hash collisions. + */ +const checkCollisions = false; + export class DFA { public s0?: DFAState; @@ -60,11 +68,9 @@ export class DFA { * Get the start state for a specific precedence value. * * @param precedence The current precedence. - @returns The start state corresponding to the specified precedence, or - * `null` if no start state exists for the specified precedence. * - * @throws IllegalStateException if this is not a precedence DFA. - * @see #isPrecedenceDfa + * @returns The start state corresponding to the specified precedence, or `null` if no start state exists for + * the specified precedence. */ public readonly getPrecedenceStartState = (precedence: number): DFAState | undefined => { if (!this.isPrecedenceDfa) { @@ -72,7 +78,7 @@ export class DFA { } // s0.edges is never null for a precedence DFA - if (!this.s0 || !this.s0.edges || precedence < 0 || precedence >= this.s0.edges.length) { + if (!this.s0 || precedence < 0 || precedence >= this.s0.edges.length) { return undefined; } @@ -115,13 +121,24 @@ export class DFA { return this.#states.get(state.configs.hashCode()) ?? null; } - public getStateForConfigs(configs: ATNConfigSet): DFAState | null { - return this.#states.get(configs.hashCode()) ?? null; + public getStateForConfigs(configs: ATNConfigSet): DFAState | undefined { + const result = this.#states.get(configs.hashCode()); + if (result && result.configs.equals(configs)) { + return result; + } + + return undefined; } public addState(state: DFAState): void { const hash = state.configs.hashCode(); - if (this.#states.has(hash)) { + const existing = this.#states.get(hash); + if (existing) { + if (checkCollisions && !existing.configs.equals(state.configs)) { + // Got an unhandled collision here. + new Error("Internal error: DFA state collision for " + state.configs.hashCode()); + } + return; }