Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Operations#union merge accept states that have no outgoing transition. #14207

Merged
merged 3 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ public static Automaton union(Collection<Automaton> l) {

result.finishState();

return removeDeadStates(result);
return mergeAcceptStatesWithNoTransition(removeDeadStates(result));
}

// Simple custom ArrayList<Transition>
Expand Down Expand Up @@ -1052,6 +1052,77 @@ public static Automaton removeDeadStates(Automaton a) {
return result;
}

/** Merge all accept states that don't have outgoing transitions to a single shared state. */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea, couldn't we potentially use this for other operations such as reverse() which might cause the same inefficiency as union() today?

Can we hit it with a simple random test for good feelings about it?

for (int i = 0; i < N; i++) {
  Automaton expected = AutomatonTestUtil.randomAutomata(random);
  Automaton actual = mergeAcceptStates(expected);
  assertSameLanguage(expected, actual);
}

static Automaton mergeAcceptStatesWithNoTransition(Automaton a) {
int numStates = a.getNumStates();

int numAcceptStatesWithNoTransition = 0;
int[] acceptStatesWithNoTransition = new int[0];

BitSet acceptStates = a.getAcceptStates();
for (int i = 0; i < numStates; ++i) {
if (acceptStates.get(i) && a.getNumTransitions(i) == 0) {
acceptStatesWithNoTransition =
ArrayUtil.grow(acceptStatesWithNoTransition, 1 + numAcceptStatesWithNoTransition);
acceptStatesWithNoTransition[numAcceptStatesWithNoTransition++] = i;
}
}

if (numAcceptStatesWithNoTransition <= 1) {
// No states to merge
return a;
}

// Shrink for simplicity.
acceptStatesWithNoTransition =
ArrayUtil.copyOfSubArray(acceptStatesWithNoTransition, 0, numAcceptStatesWithNoTransition);

// Now copy states, preserving accept states.
Automaton result = new Automaton();
for (int s = 0; s < numStates; s++) {
int remappedS = remap(s, acceptStatesWithNoTransition);
while (result.getNumStates() <= remappedS) {
result.createState();
}
if (acceptStates.get(s)) {
result.setAccept(remappedS, true);
}
}

// Now copy transitions, making sure to remap states.
Transition t = new Transition();
for (int s = 0; s < numStates; ++s) {
int remappedSource = remap(s, acceptStatesWithNoTransition);
int numTransitions = a.initTransition(s, t);
for (int j = 0; j < numTransitions; j++) {
a.getNextTransition(t);
int remappedDest = remap(t.dest, acceptStatesWithNoTransition);
result.addTransition(remappedSource, remappedDest, t.min, t.max);
}
}

result.finishState();
return result;
}

private static int remap(int s, int[] combinedStates) {
int idx = Arrays.binarySearch(combinedStates, s);
if (idx >= 0) {
// This state is part of the states that get combined, remap to the first one.
return combinedStates[0];
} else {
idx = -1 - idx;
if (idx <= 1) {
// There is either no combined state before the current state, or only the first one, which
// we're preserving: no renumbering needed.
return s;
} else {
// Subtract the number of states that get combined into the first combined state.
return s - (idx - 1);
}
}
}

/**
* Returns the longest string that is a prefix of all accepted strings and visits each state at
* most once. The automaton must not have dead states. If this automaton has already been
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,45 @@ public void testRepeat() {
Operations.determinize(Operations.repeat(aOrAb), Integer.MAX_VALUE)));
}

public void testMergeAcceptStatesWithNoTransition() {
Automaton emptyLanguage = Automata.makeEmpty();
assertSame(emptyLanguage, Operations.mergeAcceptStatesWithNoTransition(emptyLanguage));

Automaton a = Automata.makeString("a");
assertSame(a, Operations.mergeAcceptStatesWithNoTransition(a));

// All accept states get combined
Automaton aOrC = new Automaton();
aOrC.createState();
aOrC.createState();
aOrC.createState();
aOrC.addTransition(0, 1, 'a');
aOrC.setAccept(1, true);
aOrC.addTransition(0, 2, 'c');
aOrC.setAccept(2, true);
Automaton aOrCSingleAcceptState = Operations.mergeAcceptStatesWithNoTransition(aOrC);
assertEquals(1, aOrCSingleAcceptState.getAcceptStates().cardinality());
assertTrue(AutomatonTestUtil.sameLanguage(aOrC, aOrCSingleAcceptState));

// Two accept states get combined, but not the 3rd one since it has an outgoing transition
Automaton aOrCOrXStar = new Automaton();
aOrCOrXStar.createState();
aOrCOrXStar.createState();
aOrCOrXStar.createState();
aOrCOrXStar.createState();
aOrCOrXStar.addTransition(0, 1, 'a');
aOrCOrXStar.setAccept(1, true);
aOrCOrXStar.addTransition(0, 2, 'c');
aOrCOrXStar.setAccept(2, true);
aOrCOrXStar.addTransition(0, 3, 'x');
aOrCOrXStar.addTransition(3, 3, 'x');
aOrCOrXStar.setAccept(3, true);
Automaton aOrCOrXStarSingleAcceptState =
Operations.mergeAcceptStatesWithNoTransition(aOrCOrXStar);
assertEquals(2, aOrCOrXStarSingleAcceptState.getAcceptStates().cardinality());
assertTrue(AutomatonTestUtil.sameLanguage(aOrCOrXStar, aOrCOrXStarSingleAcceptState));
}

public void testDuelRepeat() {
final int iters = atLeast(1_000);
for (int iter = 0; iter < iters; ++iter) {
Expand Down