Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/one sided confidence #10

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ project/plugins/project/
# Scala-IDE specific
.scala_dependencies
.worksheet

.idea
*.iml
2 changes: 1 addition & 1 deletion src/main/scala/io/citrine/theta/BenchmarkRegistry.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ object BenchmarkRegistry {
/**
* Get the event count associated with the benchmark
* @param name of the benchmark
* @return numer of events that were benchmarked
* @return number of events that were benchmarked
*/
def getCount(name: String): Long = {
if (name == "None") return 1L
Expand Down
208 changes: 201 additions & 7 deletions src/main/scala/io/citrine/theta/Stopwatch.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package io.citrine.theta
import org.apache.commons.math3.distribution.TDistribution

import scala.collection.mutable
import scala.collection.mutable.ListBuffer

object Stopwatch {
/**
Expand All @@ -24,31 +25,34 @@ object Stopwatch {
targetError: Double = 0.05,
confidence: Double = 0.95
): Double = {
val minRunActual = Math.max(minRun, 4)
if (minRun < 4) {
throw new IllegalArgumentException(s"A minimum of 4 runs are required to establish bounds with meaningful confidence. Only $minRun were requested.")
}

val times = mutable.ListBuffer.empty[Double]
var iteration = 0
var iteration: Int = 0
var errorEstimate: Double = Double.MaxValue
var mean: Double = 0.0
while ((errorEstimate > targetError || iteration < minRunActual) && iteration < maxRun) {
while ((errorEstimate > targetError || iteration < minRun) && iteration < maxRun) {
val start = System.nanoTime()
block
val thisTime: Double = System.nanoTime() - start

times.append(thisTime) // add a new time
if (iteration % 4 == 3) times.remove(times.indexOf(times.max)) // wipe out an slow time every 4 iterations
iteration = iteration + 1
iteration += 1

/* Compute mean and variance */
val sumTime = times.sum
mean = sumTime / times.size
val variance = times.map(t => t - mean).map(t => t * t).sum / times.size
val variance: Double = calcVariance(times, Some(mean))

/* Estimate the uncertainty in the mean */
errorEstimate = if (times.size > 1) {
// We have a small number of samples, so the distribution will be a student's T distribution
val dist = new TDistribution(times.size - 1)
// Where on the t-distribution do we achieve the desired level of confidence?
val x = dist.inverseCumulativeProbability(1.0 - (1.0 - confidence)/2.0)
val x = dist.inverseCumulativeProbability(1.0 - (1.0 - confidence) / 2.0)
// Convert from the position in the t-distribution to the uncertainty in the mean,
// relative to the estimate of the mean
// Note that we're dividing the uncertainty in the mean by the estimated, not true, mean
Expand All @@ -58,7 +62,197 @@ object Stopwatch {
Double.MaxValue
}
}
// println(s"FYI: took ${iteration} iterations to converge")
mean * 1.0e-9 // convert to seconds
}

/**
* Test whether a function is slower than the specified time.
*
* @param block to time
* @param time Critical time (in seconds) to test whether the block is slower than
* @param minRun Minimum number of runs to perform testing the runtime
* @param maxRun Maximum number of runs to perform. If this number is reached, a runtime exception is
* @param falsePositive False positive rate. (Also known as the probability of observing a Type I error.)
* @param falseNegative False negative rate. (Also known as the probability of observing a Type II error.)
* thrown because the null hypothesis could not be tested at the specified
* false positive and false negative rates. In which case, either the maximum number
* of runs should be increased or the FP/FN rates should be relaxed.
* @param minimumTimeDifference Minimum time difference (in seconds) required to conclude the runtime is slower
* than the specified time.
* @param acceptableEffectSize If defined, the test will exit if the effect size exceeds this value.
* Large, medium and small effects are given by 0.8, 0.5 and 0.2, respectively.
* Units are defined in terms of standard deviations of the mean. The effect size
* is directional. If positive, the test will exit early if the mean execution time
* is clearly slower than the given `time`. If negative, the test will exit early if
* the execution time is clearly faster than the specified time. Note, however,
* that ''clearly'' in this context is defined by the effect size you specify.
* @tparam R Return type of the execution block
* @return Whether the function block is faster than the specified time.
*/
def isSlowerThan[R](
block: => R, time: Double,
minRun: Int = 4, maxRun: Int = 64,
falsePositive: Double = 0.05, falseNegative: Double = 0.20,
minimumTimeDifference: Double = 0.0,
acceptableEffectSize: Option[Double] = None
): Boolean = {
validateInputs(minRun, maxRun, falsePositive, falseNegative)

val times = mutable.ListBuffer.empty[Double]
var iteration: Int = 0
while (iteration < minRun || iteration < maxRun) {
val start = System.nanoTime()
block
val thisTime: Double = (System.nanoTime() - start) * 1.0e-9 // convert to seconds
times.append(thisTime)
iteration += 1
val sampleSize = times.size
if (sampleSize > 1 && iteration >= minRun) {
val sampleMean = calcHodgesLehmannMedian(times)
val sampleMAD = calcMAD(times)
val sampleStd = convertMADtoStd(sampleMAD)

val tDist = new TDistribution(sampleSize - 1)
val tAlpha = tDist.inverseCumulativeProbability(falsePositive)
val tBeta = tDist.inverseCumulativeProbability(falseNegative)
val delta = sampleMean - time - minimumTimeDifference
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do the signs work out here? I would think it is Math.max(0.0, Math.abs(sampleMean-time) - minimumTimeDifference)

val tRequiredSampleSize = Math.pow(sampleStd * (tAlpha + tBeta) / delta, 2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This symmetric treatment of tAlpha and tBeta wrt signum(sampleMean - time) strikes me as maybe not right. Do we get the expected behavior when FP is very low, FN is very high, and sampleMean << time or sampleMean >> time ?


// Do we have enough samples to test the hypothesis at the requested FP/FN rates?
if (sampleSize >= tRequiredSampleSize) {
val t = delta / (sampleStd / Math.sqrt(sampleSize))
return t > -tAlpha
}

if (acceptableEffectSize.isDefined) {
val effectSize = calcEffectSize(sampleMean, sampleStd, time)
if (measuredAcceptableEffectSize(effectSize, acceptableEffectSize.get)) {
// At this point the result is not statistically significant (at least not at the FP/FN rates specified).
// However, we've measured a large enough effect to report the findings via direct comparison.
return sampleMean > time
}
}
}
}
false // Results are not statistically significant. Cannot reject null hypothesis that execution time == `time`.
}

/** Checks that the user input sensible parameter values */
private def validateInputs(minRun: Int, maxRun: Int, falsePositive: Double, falseNegative: Double): Unit = {
if (minRun < 2) {
throw new IllegalArgumentException(s"Minimum number of runs = $minRun was requested. At least 2 runs are required.")
}
if (maxRun < 2) {
throw new IllegalArgumentException(s"Maximum number of runs = $maxRun was requested. At least 2 runs are required.")
}
if (maxRun < minRun) {
throw new IllegalArgumentException(s"Maximum number of runs must be greater than or equal to the minimum number of runs. Maximum requested = $maxRun. Minimum requested = $minRun.")
}
if (falsePositive < 0 || falsePositive > 0.5) {
throw new IllegalArgumentException(s"False positive probability (= $falsePositive) must be between 0 and 0.5.")
}
if (falseNegative < 0 || falseNegative > 0.5) {
throw new IllegalArgumentException(s"False negative probability (= $falseNegative) must be between 0 and 0.5.")
}
}

/**
* Checks wither the sign of the effect size matches the acceptable effect size and whether the magnitude of the
* effect is greater than the acceptable limit.
*
* @param effectSize Measure effect size
* @param acceptableEffectSize Minimum acceptable effect size for the test to terminate
* @return Whether an acceptable effect size has been measured.
*/
private def measuredAcceptableEffectSize(effectSize: Double, acceptableEffectSize: Double): Boolean = {
Math.signum(effectSize) == Math.signum(acceptableEffectSize) && Math.abs(effectSize) > Math.abs(acceptableEffectSize)
}

/**
* Calculates the variance of the list, given its mean.
*
* @param list of samples
* @param mean of the list
* @return sample variance
*/
private def calcVariance(list: ListBuffer[Double], mean: Option[Double] = None): Double = {
list.map(Math.pow(_, 2)).sum / list.size - Math.pow(mean.getOrElse(list.sum / list.size), 2)
}

/**
* Calculates the size of the measured effect in units of standard deviations of the mean.
*
* @param sampleMean Mean of the samples taken from a normal distribution
* @param sampleStd Standard deviation of normally distributed samples
* @param hypMean Hypothesized mean
* @return Effect size.
*/
private def calcEffectSize(sampleMean: Double, sampleStd: Double, hypMean: Double): Double = {
(sampleMean - hypMean) / sampleStd
}

/**
* Returns the median of a list.
*
* @param list to obtain the median from
* @return Median value
*/
private def calcMedian(list: Seq[Double]): Double = {
val size = list.size
val sortedTimes = list.sorted
val median = if (size % 2 == 0) {
val rightIndex = size / 2
val leftIndex = rightIndex - 1
(sortedTimes(leftIndex) + sortedTimes(rightIndex)) / 2
} else {
val medianIndex = (size - 1) / 2
sortedTimes(medianIndex)
}
median
}

/**
* Calculates the median absolute deviation (MAD) of the list.
*
* @param list to evaluate
* @return Median absolute deviation
*/
private def calcMAD(list: Seq[Double]): Double = {
val median = calcMedian(list)
val medianDeviations = list.map(x => Math.abs(x - median))
calcMedian(medianDeviations)
}

/**
* Factor to convert median absolute deviation to standard deviation.
* This value can be calculated from `1.0 / new NormalDistribution().inverseCumulativeProbability(0.75)`.
*/
private val kMADtoStd: Double = 1.4826022185056023

/**
* Convert median absolute deviation (MAD) to standard deviation.
*
* @param mad Median absolution deviation
* @return Standard deviation
*/
private def convertMADtoStd(mad: Double): Double = {
kMADtoStd * mad
}

/**
* Calculates the Hodges-Lehmann estimate of the median. For symmetric distributions, this returns a consistent,
* median-unbiased estimate. Commonly, is used as a method to estimate the mean that is robust to outliers.
*
* @param list used to approximate the median
* @return Approximate median
*/
private def calcHodgesLehmannMedian(list: Seq[Double]): Double = {
val size = list.size
val means = (0 until size).flatMap { i =>
(i until size).map { j =>
0.5 * (list(i) + list(j))
}
}
calcMedian(means)
}
}
102 changes: 101 additions & 1 deletion src/test/scala/io/citrine/theta/StopwatchTest.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.citrine.theta

import io.citrine.theta.benchmarks.{RandomGenerationBenchmark, StreamBenchmark}
import io.citrine.theta.benchmarks.{Benchmark, RandomGenerationBenchmark, StreamBenchmark}
import org.junit.Test

import scala.util.Random
Expand Down Expand Up @@ -34,6 +34,106 @@ class StopwatchTest {
assert(theta > 0.8, s"STREAM benchmark inconsistent (too fast ${theta})")
}

/**
* Test that the stopwatch can detect whether a benchmark with a known execution time is slower (or faster) than a specified time.
*/
@Test
def testIsNotSlowerThanAnEqualTime(): Unit = {
val b = new RandomSleepBenchmark()
val equalTime = b.mean / 1000
assert(
!Stopwatch.isSlowerThan(b.kernel(), equalTime, minimumTimeDifference = b.std / 1000), "Unable to determine a benchmark is not slower than the known average execution time."
)
}

/**
* Test that the stopwatch can detect whether a benchmark with a known execution time is slower (or faster) than a specified time.
*/
@Test
def testIsSlowerThanMinimumTimeDifference(): Unit = {
val b = new RandomSleepBenchmark()
val fasterTime = (b.mean - 3 * b.std) / 1000
assert(
!Stopwatch.isSlowerThan(b.kernel(), fasterTime, minimumTimeDifference = 4 * b.std / 1000), "Unable to determine a benchmark is not slower than the known average execution time."
)
assert(
Stopwatch.isSlowerThan(b.kernel(), fasterTime, minimumTimeDifference = 2 * b.std / 1000), "Unable to determine a benchmark is not slower than the known average execution time."
)

// check that minimum time difference does not affect the calculation when the sample mean is less than the hypothesized mean
val slowerTime = (b.mean + 3 * b.std) / 1000
assert(
!Stopwatch.isSlowerThan(b.kernel(), slowerTime, minimumTimeDifference = 10 * b.std / 1000), "Unable to determine a benchmark is not slower than the known average execution time."
)
}

/**
* Test that the stopwatch can detect whether a benchmark with a known execution time is slower (or faster) than a specified time.
*/
@Test
def testIsSlowerThan(): Unit = {
val b = new RandomSleepBenchmark()
val effectSizes = Seq(0.5, 0.8) // test medium and large effect sizes
val minimumTimeDifference = 0.2 * b.std / 1000 // must measure a small effect before rejecting null hypothesis
effectSizes.foreach { effectSize =>
val delta = b.std * effectSize
val fasterTime = (b.mean - delta) / 1000
val slowerTime = (b.mean + delta) / 1000
assert(
Stopwatch.isSlowerThan(b.kernel(), fasterTime, minimumTimeDifference = minimumTimeDifference), s"Unable to determine a benchmark is slower than a known faster time with effect size $effectSize."
)
assert(
!Stopwatch.isSlowerThan(b.kernel(), slowerTime, minimumTimeDifference = minimumTimeDifference), s"Unable to determine a benchmark is faster than a known slower time with effect size $effectSize."
)
}
}

/**
* Test that the stopwatch whether the stopwatch will exit early if an acceptable effect size is measured
*/
@Test
def testIsSlowerThanEffectSizeEarlyExit(): Unit = {
val b = new RandomSleepBenchmark()

// the measured effect size should by +/- 2 (much larger than +/- 0.5 required to terminate the test)
val muchFasterTime = (b.mean - b.std * 2) / 1000
val muchSlowerTime = (b.mean + b.std * 2) / 1000

// set FP/FN rates so low that the required number of samples won't be satisfied before an acceptable effect size is measured
val stringentProbability = 1.0e-6

val minRun = 4
val maxExpectedDuration = b.mean * (minRun + 1)

val start1 = System.nanoTime()
val isSlowerThanFasterTime = Stopwatch.isSlowerThan(b.kernel(), muchFasterTime, minRun = minRun, acceptableEffectSize = Some(0.5), falseNegative = stringentProbability, falsePositive = stringentProbability)
val duration1 = (System.nanoTime() - start1) * 1.0e-9
assert(duration1 < maxExpectedDuration, s"Benchmark did not exit early when effect size is much larger than acceptable limit.")
assert(isSlowerThanFasterTime, "Unable to determine a benchmark is slower than a known faster time after measuring acceptable effect size.")

val start2 = System.nanoTime()
val isSlowerThanSlowerTime = Stopwatch.isSlowerThan(b.kernel(), muchSlowerTime, minRun = minRun, acceptableEffectSize = Some(-0.5), falseNegative = stringentProbability, falsePositive = stringentProbability)
val duration2 = (System.nanoTime() - start2) * 1.0e-9
assert(duration2 < maxExpectedDuration, s"Benchmark did not exit early when effect size is much larger than acceptable limit.")
assert(!isSlowerThanSlowerTime, "Unable to determine a benchmark is faster than a known slower time after measuring acceptable effect size.")
}
}

/**
* Test benchmark that sleeps for a random amount of time sampled from a normal distribution.
*/
class RandomSleepBenchmark() extends Benchmark {
/** Average sleep time (in ms) */
val mean = 100.0 // ms

/** Standard deviation about the mean sleep time (in ms) */
val std = 10.0 // ms

/** Kernel that sleep for a random amount of time */
override def kernel(): Unit = {
val r = mean + Random.nextGaussian() * std
Thread.sleep(r.toLong)
}
}

object StopwatchTest {
Expand Down