Skip to content

Commit

Permalink
More refactoring, #8 #15
Browse files Browse the repository at this point in the history
  • Loading branch information
vsuthichai committed Aug 3, 2016
1 parent e4bf1e7 commit c5e152a
Show file tree
Hide file tree
Showing 30 changed files with 336 additions and 198 deletions.
10 changes: 10 additions & 0 deletions core/src/main/scala/com/eharmony/spotz/Preamble.scala
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
package com.eharmony.spotz

import com.eharmony.spotz.optimizer.SamplerFunction

import scala.math.Ordering
import scala.language.implicitConversions

/**
* @author vsuthichai
*/
object Preamble {
// type SamplerFunctionOrIterable = SamplerFunction[_] with Iterable[_]

type neg[A] = A => Nothing
type union[T, U] = neg[neg[T] with neg[U]]
type doubleNeg[A] = neg[neg[A]]

type SamplerFunctionOrIterable = union[SamplerFunction[_], Iterable[_]]

implicit object PointLossOrdering extends Ordering[(Point, Double)] {
override def compare(x: (Point, Double), y: (Point, Double)): Int = {
if (x._2 > y._2) 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ import scala.reflect.ClassTag
* @author vsuthichai
*/
trait BackendFunctions {
def bestRandomPoint[P, L](startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
space: RandomSpace[P],
reducer: ((P, L), (P, L)) => (P, L)): (P, L)
protected def bestRandomPoint[P, L](startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
space: RandomSpace[P],
reducer: ((P, L), (P, L)) => (P, L)): (P, L)

def bestPointAndLoss[P, L](gridPoints: Seq[P],
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L))
(implicit c: ClassTag[P], p: ClassTag[L]): (P, L)
protected def bestPointAndLoss[P, L](gridPoints: Seq[P],
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L))
(implicit c: ClassTag[P], p: ClassTag[L]): (P, L)
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ import scala.reflect.ClassTag
* @author vsuthichai
*/
trait ParallelFunctions extends BackendFunctions {
override def bestRandomPoint[P, L](startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
space: RandomSpace[P],
reducer: ((P, L), (P, L)) => (P, L)): (P, L) = {
protected override def bestRandomPoint[P, L](startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
space: RandomSpace[P],
reducer: ((P, L), (P, L)) => (P, L)): (P, L) = {
val pointsAndLosses = (startIndex to (startIndex + batchSize)).par.map { trial =>
val rngModifiedSpace = space.setSeed(space.seed + trial)
val point = rngModifiedSpace.sample
Expand All @@ -23,10 +23,10 @@ trait ParallelFunctions extends BackendFunctions {
pointsAndLosses.reduce(reducer)
}

override def bestPointAndLoss[P, L](gridPoints: Seq[P],
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L))
(implicit c: ClassTag[P], p: ClassTag[L]): (P, L) = {
protected override def bestPointAndLoss[P, L](gridPoints: Seq[P],
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L))
(implicit c: ClassTag[P], p: ClassTag[L]): (P, L) = {
gridPoints.par.map(point => (point, objective(point))).reduce(reducer)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ trait SparkFunctions extends BackendFunctions {
* generated
* @return the best point with the best loss
*/
override def bestRandomPoint[P, L](startIndex: Long,
protected[backend] override def bestRandomPoint[P, L](startIndex: Long,
batchSize: Long,
objective: Objective[P, L],
space: RandomSpace[P],
Expand All @@ -58,7 +58,7 @@ trait SparkFunctions extends BackendFunctions {
pointAndLossRDD.reduce(reducer)
}

override def bestPointAndLoss[P, L](gridPoints: Seq[P],
protected[backend] override def bestPointAndLoss[P, L](gridPoints: Seq[P],
objective: Objective[P, L],
reducer: ((P, L), (P, L)) => (P, L))
(implicit c: ClassTag[P], p: ClassTag[L]): (P, L) = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@ import scala.util.Random
/**
* @author vsuthichai
*/
trait SamplerFunction[T] extends Serializable {
def apply(rng: Random): T
}

trait RandomSampler[T] extends SamplerFunction[T]
trait SamplerFunction[T] extends Serializable

abstract class Uniform[T](lb: T, ub: T) extends RandomSampler[T] {
abstract class RandomSampler[T] extends SamplerFunction[T] {
def apply(rng: Random): T
}

abstract class Uniform[T](lb: T, ub: T) extends RandomSampler[T]

case class UniformDouble(lb: Double, ub: Double) extends Uniform[Double](lb, ub) {
override def apply(rng: Random): Double = lb + ((ub - lb) * rng.nextDouble)
}
Expand All @@ -23,7 +21,13 @@ case class UniformInt(lb: Int, ub: Int) extends Uniform[Int](lb, ub) {
override def apply(rng: Random): Int = lb + rng.nextInt(ub - lb)
}

case class IterableSampler[T](iterable: Iterable[T]) extends RandomSampler[T] {
case class RandomChoice[T](iterable: Iterable[T]) extends RandomSampler[T] {
val values = iterable.toSeq

override def apply(rng: Random): T = values(rng.nextInt(values.length))
}

case class Grid[T](iterable: Iterable[T]) extends RandomSampler[T] {
val values = iterable.toSeq

override def apply(rng: Random): T = values(rng.nextInt(values.length))
Expand Down
36 changes: 25 additions & 11 deletions core/src/main/scala/com/eharmony/spotz/optimizer/Optimizer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,20 @@ import scala.reflect.ClassTag
/**
* @author vsuthichai
*/
trait Optimizer[P, L, -S <: Space[P], +R <: OptimizerResult[P, L]] extends Serializable {
def minimize(objective: Objective[P, L], space: S)(implicit c: ClassTag[P], p: ClassTag[L]): R
def maximize(objective: Objective[P, L], space: S)(implicit c: ClassTag[P], p: ClassTag[L]): R
trait Optimizer[P, L, +R] extends Serializable {
def minimize(objective: Objective[P, L])(implicit c: ClassTag[P], p: ClassTag[L]): R
def maximize(objective: Objective[P, L])(implicit c: ClassTag[P], p: ClassTag[L]): R
}

/*
trait AbstractOptimizer[P, L, -S <: Space[P], +R <: OptimizerResult[P, L]] extends Optimizer[P, L, S, R] {
type Reducer[T] = (T, T) => T
implicit val ord: Ordering[(P, L)]
def min(p1: (P,L), p2: (P,L))(implicit ord: Ordering[(P,L)]): (P,L) = ord.min(p1, p2)
def max(p1: (P,L), p2: (P,L))(implicit ord: Ordering[(P,L)]): (P,L) = ord.max(p1, p2)
protected def min(p1: (P,L), p2: (P,L))(implicit ord: Ordering[(P,L)]): (P,L) = ord.min(p1, p2)
protected def max(p1: (P,L), p2: (P,L))(implicit ord: Ordering[(P,L)]): (P,L) = ord.max(p1, p2)
protected def optimize(objective: Objective[P, L], space: S, reducer: Reducer[(P, L)])
(implicit c: ClassTag[P], p: ClassTag[L]): R
override def minimize(objective: Objective[P, L], space: S)(implicit c: ClassTag[P], p: ClassTag[L]): R = {
optimize(objective, space, min)
Expand All @@ -27,14 +30,25 @@ trait AbstractOptimizer[P, L, -S <: Space[P], +R <: OptimizerResult[P, L]] exten
override def maximize(objective: Objective[P, L], space: S)(implicit c: ClassTag[P], p: ClassTag[L]): R = {
optimize(objective, space, max)
}
}
*/

protected def optimize(objective: Objective[P, L], space: S, reducer: Reducer[(P, L)])
trait AbstractOptimizer[P, L, R <: OptimizerResult[P, L]] extends Optimizer[P, L, R] {
type Reducer[T] = (T, T) => T
implicit val ord: Ordering[(P, L)]

protected def min(p1: (P,L), p2: (P,L))(implicit ord: Ordering[(P,L)]): (P,L) = ord.min(p1, p2)
protected def max(p1: (P,L), p2: (P,L))(implicit ord: Ordering[(P,L)]): (P,L) = ord.max(p1, p2)
protected def optimize(objective: Objective[P, L], reducer: Reducer[(P, L)])
(implicit c: ClassTag[P], p: ClassTag[L]): R
}

abstract class OptimizerResult[P, L](bestPoint: P, bestLoss: L)
override def minimize(objective: Objective[P, L])(implicit c: ClassTag[P], p: ClassTag[L]): R = {
optimize(objective, min)
}

trait Space[P] extends Serializable {
def sample: P
def sample(howMany: Int): Iterable[P] = Seq.fill(howMany)(sample)
override def maximize(objective: Objective[P, L])(implicit c: ClassTag[P], p: ClassTag[L]): R = {
optimize(objective, max)
}
}

abstract class OptimizerResult[P, L](bestPoint: P, bestLoss: L)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.eharmony.spotz.optimizer

/**
* Created by vsuthichai on 8/2/16.
*/
object OptimizerConstants {
val RANDOM_SEARCH = "random"
val GRID_SEARCH = "grid"


val SPARK_BACKEND = "spark"
val THREADS_BACKEND = "threads"
}
9 changes: 9 additions & 0 deletions core/src/main/scala/com/eharmony/spotz/optimizer/Space.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.eharmony.spotz.optimizer

/**
* @author vsuthichai
*/
trait Space[P] extends Serializable {
def sample: P
def sample(howMany: Int): Iterable[P] = Seq.fill(howMany)(sample)
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ sealed trait StopStrategy extends Serializable {
def shouldStop(trialsSoFar: Long, timeSinceFirstTrial: Duration): Boolean
}

// TODO
case class StopContext[P, L](foo: Any)

class MaxTrialsStop(maxTrials: Long) extends StopStrategy {
assert(maxTrials > 0, "Must specify greater than 0 trials.")
override def getMaxTrials: Long = maxTrials
Expand All @@ -38,13 +41,22 @@ class MaxTrialsOrMaxDurationStop(maxTrials: Long, maxDuration: Duration) extends
}
}

object OptimizerFinishes extends StopStrategy {
override def shouldStop(trialsSoFar: Long, durationSinceFirstTrial: Duration): Boolean = false
}

class StopStrategyPredicate[P, L](f: (StopContext[P, L]) => Boolean) {
def shouldStop(stopContext: StopContext[P, L]) = f(stopContext)
}

/**
* Companion factory object to instantiate various stop strategies.
*/
* Companion factory object to instantiate various stop strategies.
*/
object StopStrategy {
def stopAfterMaxDuration(maxDuration: Duration): TimedStop = new TimedStop(maxDuration)
def stopAfterMaxTrials(maxTrials: Long): MaxTrialsStop = new MaxTrialsStop(maxTrials)
def stopAfterMaxTrialsOrMaxDuration(maxTrials: Long, maxDuration: Duration): MaxTrialsOrMaxDurationStop = {
def stopAfterMaxDuration(maxDuration: Duration): StopStrategy = new TimedStop(maxDuration)
def stopAfterMaxTrials(maxTrials: Long): StopStrategy = new MaxTrialsStop(maxTrials)
def stopAfterMaxTrialsOrMaxDuration(maxTrials: Long, maxDuration: Duration): StopStrategy = {
new MaxTrialsOrMaxDurationStop(maxTrials, maxDuration)
}
def stopWhenOptimizerFinishes: StopStrategy = OptimizerFinishes
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package com.eharmony.spotz.optimizer.grid
import com.eharmony.spotz.backend.{BackendFunctions, ParallelFunctions, SparkFunctions}
import com.eharmony.spotz.objective.Objective
import com.eharmony.spotz.optimizer.AbstractOptimizer
import com.eharmony.spotz.util.{DurationUtils, Logger}
import org.apache.spark.SparkContext
import org.joda.time.{DateTime, Duration}

Expand All @@ -14,16 +15,27 @@ import scala.reflect.ClassTag
* @author vsuthichai
*/
abstract class GridSearch[P, L]
(trialBatchSize: Int = 100)
(implicit val ord: Ordering[(P, L)])
extends AbstractOptimizer[P, L, GridSpace[P], GridSearchResult[P, L]]
(paramSpace: Map[String, Iterable[_]], trialBatchSize: Int)
(implicit ord: Ordering[(P, L)], factory: Map[String, _] => P)
extends AbstractOptimizer[P, L, GridSearchResult[P, L]]
with BackendFunctions {

override def optimize(objective: Objective[P, L],
space: GridSpace[P],
reducer: Reducer[(P, L)])
(implicit c: ClassTag[P], p: ClassTag[L]): GridSearchResult[P, L] = {
val LOG = Logger[this.type]()

def minimize(objective: Objective[P, L], space: Map[String, Iterable[_]])
(implicit c: ClassTag[P], p: ClassTag[L]): GridSearchResult[P, L] = {
optimize(objective, min)
}

def maximize(objective: Objective[P, L], space: Map[String, Iterable[_]])
(implicit c: ClassTag[P], p: ClassTag[L]): GridSearchResult[P, L] = {
optimize(objective, max)
}

override protected def optimize(objective: Objective[P, L],
reducer: Reducer[(P, L)])
(implicit c: ClassTag[P], p: ClassTag[L]): GridSearchResult[P, L] = {
val space = new GridSpace[P](paramSpace)
val startTime = DateTime.now()
val firstPoint = space.sample
val firstLoss = objective(firstPoint)
Expand All @@ -33,26 +45,31 @@ abstract class GridSearch[P, L]
}

@tailrec
private[this] def gridSearch(objective: Objective[P, L], space: GridSpace[P], reducer: Reducer[(P, L)],
startTime: DateTime, bestPointSoFar: P, bestLossSoFar: L, trialsSoFar: Long)
(implicit c: ClassTag[P], p: ClassTag[L]): GridSearchResult[P, L] = {
private def gridSearch(objective: Objective[P, L], space: GridSpace[P], reducer: Reducer[(P, L)],
startTime: DateTime, bestPointSoFar: P, bestLossSoFar: L, trialsSoFar: Long)
(implicit c: ClassTag[P], p: ClassTag[L]): GridSearchResult[P, L] = {

val endTime = DateTime.now()
val elapsedTime = new Duration(startTime, endTime)

LOG.info(s"Best point and loss after $trialsSoFar trials and ${DurationUtils.format(elapsedTime)} : $bestPointSoFar loss: $bestLossSoFar")

space.isExhausted match {
case true =>
GridSearchResult(bestPointSoFar, bestLossSoFar, startTime, endTime, trialsSoFar, elapsedTime)
case false =>
val points = space.sample(trialBatchSize)
val (bestPoint, bestLoss) = reducer((bestPointSoFar, bestLossSoFar), bestPointAndLoss(points.toSeq, objective, reducer))
gridSearch(objective, space, reducer, startTime, bestPoint, bestLoss, trialsSoFar)

gridSearch(objective, space, reducer, startTime, bestPoint, bestLoss, trialsSoFar + points.size)
}
}
}

class ParGridSearch[P, L](trialBatchSize: Int = 100)(implicit ord: Ordering[(P, L)])
extends GridSearch[P, L](trialBatchSize)(ord) with ParallelFunctions
class ParGridSearch[P, L](paramSpace: Map[String, Iterable[_]], trialBatchSize: Int = 100000)
(implicit val ord: Ordering[(P, L)], factory: Map[String, _] => P)
extends GridSearch[P, L](paramSpace, trialBatchSize)(ord, factory) with ParallelFunctions

class SparkGridSearch[P, L](@transient val sc: SparkContext, trialBatchSize: Int = 100)(implicit ord: Ordering[(P, L)])
extends GridSearch[P, L](trialBatchSize)(ord) with SparkFunctions
class SparkGridSearch[P, L](@transient val sc: SparkContext, paramSpace: Map[String, Iterable[_]], trialBatchSize: Int = 100000)
(implicit val ord: Ordering[(P, L)], factory: Map[String, _] => P)
extends GridSearch[P, L](paramSpace, trialBatchSize)(ord, factory) with SparkFunctions
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.eharmony.spotz.optimizer.grid

import com.eharmony.spotz.optimizer.OptimizerResult
import org.joda.time.format.PeriodFormatterBuilder
import com.eharmony.spotz.util.DurationUtils
import org.joda.time.{DateTime, Duration}

/**
Expand All @@ -17,15 +17,7 @@ case class GridSearchResult[P, L](
extends OptimizerResult[P, L](bestPoint, bestLoss) {

override def toString = {
val formatter = new PeriodFormatterBuilder()
.appendDays().appendSuffix("d")
.appendHours().appendSuffix("h")
.appendMinutes().appendSuffix("m")
.appendSeconds().appendSuffix("s")
.appendMillis().appendSuffix("ms")
.toFormatter

s"GridSearchResult(bestPoint=$bestPoint, bestLoss=$bestLoss, " +
s"totalTrials=$totalTrials, duration=${formatter.print(elapsedTime.toPeriod)}"
s"totalTrials=$totalTrials, duration=${DurationUtils.format(elapsedTime)}"
}
}
Loading

0 comments on commit c5e152a

Please sign in to comment.