From 5a139fd73b18cc43ced9f13b9e3c29cf6f5798f8 Mon Sep 17 00:00:00 2001
From: breandan <bre@ndan.co>
Date: Thu, 12 Dec 2024 23:41:57 -0500
Subject: [PATCH] profiling and cleanup

---
 .../ai/hypergraph/kaliningraph/parsing/CFG.kt |  8 ++-
 .../kaliningraph/repair/Grammars.kt           |  2 +-
 .../hypergraph/kaliningraph/tensor/Tensor.kt  | 42 ++-----------
 .../kaliningraph/parsing/Grammars.kt          | 60 ++++++++++---------
 .../kaliningraph/parsing/SetValiantTest.kt    | 12 ++++
 5 files changed, 57 insertions(+), 67 deletions(-)

diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt
index 2a4099b4..28c33024 100644
--- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt
+++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/CFG.kt
@@ -64,8 +64,12 @@ val CFG.ntMap by cache { ntLst.mapIndexed { i, s -> s to i }.toMap() }
 // which is then flattened to a list of adjacent pairs of nonterminal indices
 val CFG.vindex: Array<IntArray> by cache {
   Array(bindex.indexedNTs.size) { i ->
-    bimap[bindex[i]].filter { it.size > 1 }
-      .flatMap { listOf(bindex[it[0]], bindex[it[1]]) }.toIntArray()
+//    val lhs = bindex[i]
+    bimap[bindex[i]].filter { it.size == 2 }
+//      .map { it to -(PCFG3_BIFI[lhs to it[0] to it[1]] ?: 0).also { s -> println("$lhs -> ${it[0]} ${it[1]} ($s)" )} }
+//      .sortedBy { it.second }.map { it.first }
+      .map { it.map { bindex[it] } }.flatten()
+      .toIntArray()
   }
 }
 
diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/repair/Grammars.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/repair/Grammars.kt
index 21f5706a..fe821b72 100644
--- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/repair/Grammars.kt
+++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/repair/Grammars.kt
@@ -5,7 +5,7 @@ import ai.hypergraph.kaliningraph.parsing.noEpsilonOrNonterminalStubs
 import ai.hypergraph.kaliningraph.parsing.noNonterminalStubs
 import ai.hypergraph.kaliningraph.parsing.parseCFG
 
-val s2pCFGStr =   """
+val s2pCFGStr = """
 START -> Stmts_Or_Newlines
 Stmts_Or_Newlines -> Stmt_Or_Newline | Stmt_Or_Newline Stmts_Or_Newlines
 Stmt_Or_Newline -> Stmt | Newline
diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/tensor/Tensor.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/tensor/Tensor.kt
index 1d8fdc3b..048af5c0 100644
--- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/tensor/Tensor.kt
+++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/tensor/Tensor.kt
@@ -339,8 +339,9 @@ operator fun DoubleMatrix.times(value: Double): DoubleMatrix =
   DoubleMatrix(numRows, numCols, data.map { it * value })
 
 // TODO: Rewrite this from scratch using T: List<UTMatrix<T>> recursive type with overlapping trees
-// Diagonals of a strictly-UT matrix for DAG-based dynamic programming
-class UTMatrix<T> constructor(
+// Diagonals of a strictly upper triangular matrix for DAG-based dynamic programming
+// All lower diagonal and diagonal entries are zero
+open class UTMatrix<T> constructor(
   val diagonals: List<List<T>>, // List of strictly-UT diagonals from longest to shortest
   override val algebra: Ring<T>
 ): AbstractMatrix<T, Ring<T>, UTMatrix<T>>(algebra, diagonals.first().size + 1) {
@@ -382,7 +383,9 @@ class UTMatrix<T> constructor(
 
   fun squared() = toFullMatrix().squareUpperTriangular().toUTMatrix()
 
-  fun seekFixpoint(
+  // Performs matrix-matrix multiplication until the fixpoint is reached
+  // This basically fills up each diagonal until the last upper diagonal
+  open fun seekFixpoint(
     // Carries a triple of:
     //    (1) the element itself,
     //    (2) row to an element's left (inclusive)
@@ -411,39 +414,6 @@ class UTMatrix<T> constructor(
       ).seekFixpoint(next, iteration + 1, maxIterations)
     }
 
-  fun seekFixpointFast(maxIterations: Int = diagonals.first().size): UTMatrix<T> {
-    var iteration = 0
-
-    val diagonalsMutable = diagonals.toMutableList()
-    val carry = diagonals.last().map { it to mutableListOf(it) to mutableListOf(it) }.toMutableList()
-
-    while (iteration < maxIterations && diagonalsMutable.last().size != 1) {
-      val next = mutableListOf<Triple<T, MutableList<T>, MutableList<T>>>()
-
-      for (i in 1 until carry.size) {
-        var acc = algebra.nil
-        for (j in carry[i - 1].second.indices) {
-          acc = with(algebra) { acc + (carry[i - 1].second[j] * carry[i].third[j]) }
-        }
-
-        val left = carry[i - 1].second.apply { add(acc) }
-        val right = carry[i].third.apply { add(0, acc) }
-
-        next.add(Triple(acc, left, right))
-      }
-
-      diagonalsMutable += next.map { it.first }
-      carry.clear()
-      carry.addAll(next)
-      iteration++
-    }
-
-    return UTMatrix(
-      diagonals = diagonalsMutable,
-      algebra = algebra
-    )
-  }
-
   // Offsets diagonals by one when converting back to matrix (superdiagonal)
   fun toFullMatrix() =
     if (diagonals.last().size != 1)
diff --git a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/Grammars.kt b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/Grammars.kt
index 2e80768b..58ee134e 100644
--- a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/Grammars.kt
+++ b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/Grammars.kt
@@ -1,10 +1,13 @@
 import ai.hypergraph.kaliningraph.parsing.*
 
 object Grammars {
-  val sss = """START -> b | START | START START | START START START"""
-    .parseCFG().noNonterminalStubs
+  val sss by lazy {
+    """START -> b | START | START START | START START START"""
+      .parseCFG().noNonterminalStubs
+  }
 
-  val ifThen = """
+  val ifThen by lazy {
+    """
      START -> X
      X -> I | F | P | Q
      P -> I O I | P O I
@@ -18,33 +21,34 @@ object Grammars {
     BO -> and | or | xor | nand
      N -> !
   """.parseCFG().noNonterminalStubs
+  }
 
-  val toyArith = """
+  val toyArith by lazy { """
     S -> S + S | S * S | S - S | S / S | ( S ) | - S
     S -> 0 | 1 | 2 | 3 | 4
     S -> X | Y | Z
-  """.parseCFG().noNonterminalStubs
+  """.parseCFG().noNonterminalStubs }
 
-  val dyckUnambig = """S -> ( S ) S | ( S ) | ( ) S | ( )""".parseCFG().noEpsilonOrNonterminalStubs
-  val dyck = """S -> ( S ) | ( ) | S S""".parseCFG().noEpsilonOrNonterminalStubs
+  val dyckUnambig by lazy { """S -> ( S ) S | ( S ) | ( ) S | ( )""".parseCFG().noEpsilonOrNonterminalStubs }
+  val dyck by lazy { """S -> ( S ) | ( ) | S S""".parseCFG().noEpsilonOrNonterminalStubs }
 
-  val dyckEmbedded = """
+  val dyckEmbedded by lazy { """
     START -> ( ) | ( START ) | START START
     START -> START + START | START * START
     START -> 1
-  """.parseCFG().noNonterminalStubs
+  """.parseCFG().noNonterminalStubs}
 
-  val deadSimple = """S -> ( ) | ( S )""".parseCFG().noEpsilonOrNonterminalStubs
-  val dsNorm = """
+  val deadSimple by lazy { """S -> ( ) | ( S )""".parseCFG().noEpsilonOrNonterminalStubs }
+  val dsNorm by lazy { """
     START -> START START
     START -> A B
     START -> A C
     A -> (
     B -> )
     C -> START B
-  """.parseCFG().noEpsilonOrNonterminalStubs
+  """.parseCFG().noEpsilonOrNonterminalStubs }
 
-  val ocamlCFG = """
+  val ocamlCFG by lazy { """
     S -> X
     X -> A | V | ( X , X ) | X X | ( X )
     A -> FUN | F | LI | M | L
@@ -69,9 +73,9 @@ object Grammars {
     VO -> = | < | `||` | `&&`
     I -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
     B ->  true | false
-  """.parseCFG().noNonterminalStubs
+  """.parseCFG().noNonterminalStubs }
 
-  val coarsenedPythonCFG = """
+  val coarsenedPythonCFG by lazy { """
     S -> w | S ( S ) | ( ) | S = S | S . S | S S | ( S ) | [ S ] | { S } | : | * S | [ ]
     S -> S , S | S ; S | S : S
     S -> S IOP S | S BOP S
@@ -86,9 +90,9 @@ object Grammars {
     S -> if S | if S else S | return S
     S -> not S | S or S
     S -> lambda w : S | lambda w , w : S | lambda w , w , w : S | lambda w , w , w , w : S
-  """.parseCFG().noNonterminalStubs
+  """.parseCFG().noNonterminalStubs }
 
-  val tinyC: CFG = """
+  val tinyC: CFG by lazy { """
     START -> program
     program -> statement
     statement -> if paren_expr statement
@@ -103,10 +107,10 @@ object Grammars {
     test -> sum | sum < sum
     sum -> term | sum + term | sum - term
     term -> id | int | paren_expr
-  """.parseCFG().freeze()
+  """.parseCFG().freeze() }
 
 //  https://aclanthology.org/2020.conll-1.41.pdf#page=12
-  val hardestCFL: CFG = """
+  val hardestCFL: CFG by lazy { """
     S' -> R ${'$'} Q S L ;
     L -> L' , U
     L' -> , V L'
@@ -131,10 +135,10 @@ object Grammars {
     T -> [ Q S Q ]
     T -> ( Q )
     T -> [ Q ]
-  """.trimIndent().parseCFG().noNonterminalStubs
+  """.trimIndent().parseCFG().noNonterminalStubs }
 
   val shortS2PParikhMap by lazy { ParikhMap(seq2parsePythonCFG, 20) }
-  val seq2parsePythonCFGStr = """
+  val seq2parsePythonCFGStr by lazy { """
     START -> Stmts_Or_Newlines
     Stmts_Or_Newlines -> Stmt_Or_Newline | Stmt_Or_Newline Stmts_Or_Newlines
     Stmt_Or_Newline -> Stmt | Newline
@@ -325,12 +329,12 @@ object Grammars {
 
     Yield_Expr -> Yield_Keyword | Yield_Keyword Yield_Arg
     Yield_Arg -> From_Keyword Test | Testlist_Endcomma 
-  """
+  """ }
 
-  val seq2parsePythonCFG: CFG = seq2parsePythonCFGStr.parseCFG().noNonterminalStubs
-  val seq2parsePythonVanillaCFG: CFG = seq2parsePythonCFGStr.parseCFG().noEpsilonOrNonterminalStubs
+  val seq2parsePythonCFG: CFG by lazy { seq2parsePythonCFGStr.parseCFG().noNonterminalStubs }
+  val seq2parsePythonVanillaCFG: CFG by lazy { seq2parsePythonCFGStr.parseCFG().noEpsilonOrNonterminalStubs }
 
-  val checkedArithCFG = """
+  val checkedArithCFG by lazy { """
     START -> S
 S -> S1 = S1
 S -> S2 = S2
@@ -381,13 +385,13 @@ P6 -> P6 / P1
 P7 -> P7 / P1
 P8 -> P8 / P1
 P9 -> P9 / P1
-  """.parseCFG().noNonterminalStubs.freeze()
+  """.parseCFG().noNonterminalStubs.freeze() }
 
-  val arith = """
+  val arith by lazy { """
     O -> + | * | - | /
     S -> S O S | ( S )
     S -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
-  """.parseCFG()
+  """.parseCFG() }
 
   private fun Tree.middle(): Σᐩ? = children.drop(1).firstOrNull()?.terminal
   fun Tree.evalArith(): Int = when {
diff --git a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt
index 2ca00323..acf3a149 100644
--- a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt
+++ b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt
@@ -15,6 +15,18 @@ import kotlin.time.*
 ./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.parsing.SetValiantTest"
 */
 class SetValiantTest {
+/*
+./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.parsing.SetValiantTest.testStressRecognizer"
+*/
+  @Test
+  fun testStressRecognizer() {
+    val g = Grammars.seq2parsePythonVanillaCFG
+    g.sliceSample(20).take(10000).forEach {
+      assertTrue(it.matches(g))
+      assertFalse(it.tokenizeByWhitespace().dropLastWhile { it == "DEDENT" || it == "NEWLINE" }.matches(g))
+    }
+  }
+
 /*
 ./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.parsing.SetValiantTest.testSimpleGrammar"
 */