Skip to content

Commit

Permalink
tweak default parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
jli05 committed Jun 4, 2018
1 parent 42b1169 commit 44459c4
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 7 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ organization := "megadata"

name := "SpectralLDA-Tensor"

version := "1.3.0.rc12"
version := "1.3.0.rc13"

scalaVersion := "2.11.12"

Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/megadata/spectralLDA/algorithm/RandNLA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ object RandNLA {
numDocs: Long,
firstOrderMoments: DenseVector[Double],
documents: RDD[(Long, Double, SparseVector[Double])],
nIter: Int = 1)
nIter: Int)
(implicit randBasis: RandBasis = Rand)
: (DenseMatrix[Double], DenseVector[Double]) = {
assert(vocabSize >= dimK)
Expand All @@ -55,7 +55,7 @@ object RandNLA {
// Universality laws for randomized dimension reduction
// with applications, S. Oymak and J. A. Tropp. Inform. Inference, Nov. 2017
// Theorem II on Restricted Minimum Singular Value
val projectedDim = math.pow(dimK, 1.1).toInt
val projectedDim = math.pow(dimK, 1.15).toInt

// Cache some data
val extDocs = documents
Expand Down
7 changes: 5 additions & 2 deletions src/main/scala/megadata/spectralLDA/algorithm/TensorLDA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ import org.apache.log4j.Logger
* true by default
* @param numIterationsKrylovMethod iterations of the Krylov Method for
* randomised SVD, 2 by default
* @param slackDimK extra number of orthogonal topic
* fragments to discover in order to
* compose k topics
* @param postProcessing post-processing topic-word distribution
* matrix by projection into l1-simplex,
* false by default
Expand All @@ -36,7 +39,7 @@ class TensorLDA(dimK: Int,
maxIterations: Int = 500,
tol: Double = 1e-6,
randomisedSVD: Boolean = true,
numIterationsKrylovMethod: Int = 2,
numIterationsKrylovMethod: Int = 1,
slackDimK: Option[Int] = None,
postProcessing: Boolean = false) extends Serializable {
@transient private lazy val logger = Logger.getLogger("TensorLDA")
Expand All @@ -48,7 +51,7 @@ class TensorLDA(dimK: Int,
// As one final discovered topic could be the combination of multiple
// eigenvectors of M2, we allow redundancy in the number of eigenvectors
// we compute for M2.
val slackK = slackDimK.getOrElse(dimK / 2)
val slackK = slackDimK.getOrElse(dimK)
logger.info(s"Slack of random projection dimension: $slackK")

assert(alpha0 > 0, "The topic concentration alpha0 must be positive.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ object DataCumulant {
alpha0: Double,
documents: RDD[(Long, SparseVector[Double])],
randomisedSVD: Boolean = true,
numIterationsKrylovMethod: Int = 1)
numIterationsKrylovMethod: Int)
(implicit randBasis: RandBasis = Rand)
: DataCumulant = {
assert(dimK > 0, "The number of topics dimK must be positive.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ class DataCumulantTest extends FlatSpec with Matchers {
dimK = 3,
alpha0 = sum(alpha),
documentsRDD,
randomisedSVD = false
randomisedSVD = false,
numIterationsKrylovMethod = 0
)

val expectedM3: DenseMatrix[Double] = expected_whitened_M3(
Expand Down

0 comments on commit 44459c4

Please sign in to comment.