Skip to content

Commit

Permalink
fix broken tests
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Feb 26, 2024
1 parent 25efa99 commit ff21a76
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ fun Array<DoubleArray>.toDoubleMatrix() = DoubleMatrix(size, this[0].size) { i,

fun kroneckerDelta(i: Int, j: Int) = if (i == j) 1.0 else 0.0

// This is fast, but seems to be an unreliable hash function
fun hash(vararg ints: Any): Int = ints.fold(0) { acc, i -> 31 * acc + i.hashCode() }
fun hash(vararg ints: Int): Int = ints.fold(0) { acc, i -> 31 * acc + i }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ open class FSA(open val Q: TSA, open val init: Set<Σᐩ>, open val final: Set<
val nominalForm: NOM by lazy { nominalize() }
val states by lazy { Q.states }
val stateLst by lazy { states.toList() }
val stateMap by lazy { states.toList().withIndex().associate { it.value to it.index } }
val APSP: Map<Int, Int> by lazy {
val stateMap by lazy { states.withIndex().associate { it.value to it.index } }
val APSP: Map<Pair<Int, Int>, Int> by lazy {
graph.APSP.map { (k, v) ->
Pair(hash(stateMap[k.first.label]!!, stateMap[k.second.label]!!), v)
// println("Hashing: ${k.first.label} -> ${k.second.label} == $v")
Pair(stateMap[k.first.label]!! to stateMap[k.second.label]!!, v)
}.toMap()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import ai.hypergraph.kaliningraph.repair.MAX_TOKENS
import ai.hypergraph.kaliningraph.types.*
import ai.hypergraph.kaliningraph.types.times
import kotlin.math.*
import kotlin.random.Random
import kotlin.time.TimeSource

/**
Expand Down Expand Up @@ -268,7 +269,8 @@ private fun manhattanDistance(first: Pair<Int, Int>, second: Pair<Int, Int>): In

// Range of the shortest path to the longest path, i.e., Manhattan distance
private fun FSA.SPLP(a: STC, b: STC) =
(APSP[hash(a.π1, b.π1)] ?: Int.MAX_VALUE)..
(APSP[a.π1 to b.π1] ?: Int.MAX_VALUE)..//.also { /*if (Random.nextInt(100000) == 3) if(it == Int.MAX_VALUE) println("Miss! ${hash(a.π1, b.π1)} / ${a.first} / ${b.first}") else */
// if (it != Int.MAX_VALUE) println("Hit: ${hash(a.π1, b.π1)} / ${a.first} / ${b.first}") }..
manhattanDistance(a.coords(), b.coords())

private fun IntRange.overlaps(other: IntRange) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import kotlin.time.*
var CFG_THRESH = 20_000
var MAX_UNIQUE = 20_000 // Maximum number of unique samples to generate
var MAX_SAMPLE = 20 // Maximum number of repairs to sample
var MAX_TOKENS = 80 // Maximum number of tokens per repair
var MAX_TOKENS = 40 // Maximum number of tokens per repair
var MAX_RADIUS = 4
var TIMEOUT_MS = 90_000 // Timeout for each repair attempt (default, modify elsewhere)
var MAX_REPAIR = 2 // Maximum number of edits per repair
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ private fun CFG.jvmIntersectLevFSAP(fsa: FSA, parikhMap: ParikhMap): CFG {
// we have the production [p,A,r] → [p,B,q] [q,C,r] in P′.
val prods = nonterminalProductions
.map { (a, b) -> ntMap[a]!! to b.map { ntMap[it]!! } }.toSet()
// val lengthBoundsCache = lengthBounds.let { lb -> ntLst.map { lb[it]!! } }
val lengthBoundsCache = lengthBounds.let { lb -> ntLst.map { lb[it] ?: 0..0 } }
val validTriples: List<Triple<STC, STC, STC>> = fsa.validTriples

val elimCounter = AtomicInteger(0)
Expand All @@ -180,7 +180,7 @@ private fun CFG.jvmIntersectLevFSAP(fsa: FSA, parikhMap: ParikhMap): CFG {
validTriples.stream()
// CFG ∩ FSA - in general we are not allowed to do this, but it works
// because we assume a Levenshtein FSA, which is monotone and acyclic.
// .filter { it.isCompatibleWith(A to B to C, fsa, lengthBoundsCache).also { if (!it) elimCounter.incrementAndGet() } }
.filter { it.isCompatibleWith(A to B to C, fsa, lengthBoundsCache).also { if (!it) elimCounter.incrementAndGet() } }
.filter { it.obeysLevenshteinParikhBounds(A to B to C, fsa, parikhMap).also { if (!it) elimCounter.incrementAndGet() } }
.map { (a, b, c) ->
if (MAX_PRODS < counter.incrementAndGet())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import ai.hypergraph.kaliningraph.tokenizeByWhitespace
import ai.hypergraph.markovian.*
import org.junit.jupiter.api.Test
import org.kosat.round
import java.util.stream.Collectors
import kotlin.random.Random
import kotlin.reflect.KFunction2
import kotlin.test.*
Expand Down Expand Up @@ -98,7 +99,32 @@ class ProbabilisticLBH {
.toSet()
}


/*
./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.repair.ProbabilisticLBH.twoEditRepair"
*/
@Test
fun twoEditRepair() {
val source = "NAME = { STRING = NUMBER , STRING = NUMBER , STRING = NUMBER } NEWLINE"
val repair = "NAME = { STRING : NUMBER , STRING : NUMBER , STRING : NUMBER } NEWLINE"
val gram = Grammars.seq2parsePythonCFG.noEpsilonOrNonterminalStubs
MAX_TOKENS = source.tokenizeByWhitespace().size + 5
MAX_RADIUS = 3
val levDist = 3
assertTrue(repair in gram.language && levenshtein(source, repair) <= levDist)

val clock = TimeSource.Monotonic.markNow()
val levBall = makeLevFSA(source.tokenizeByWhitespace(), levDist)
val intGram = gram.jvmIntersectLevFSA(levBall)
println("Finished ${intGram.size}-prod ∩-grammar in ${clock.elapsedNow()}")
val lbhSet = intGram.toPTree().sampleDirectlyWOR()
.takeWhile { clock.elapsedNow().inWholeSeconds < 30 }.collect(Collectors.toSet())
println("Sampled ${lbhSet.size} repairs using Levenshtein/Bar-Hillel in ${clock.elapsedNow()}")
assertTrue(repair in intGram.language)
assertTrue(repair in lbhSet)
}

/*
./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.repair.ProbabilisticLBH.testInvalidLines"
*/
// @Test
Expand Down

0 comments on commit ff21a76

Please sign in to comment.