Skip to content

Commit

Permalink
Moving auto-embedding for AIS (Ragwitz and maximum bias-corrected AIS…
Browse files Browse the repository at this point in the history
… methods) from out of Gaussian and KSG calculators and into the common AIS calculator via MI class. The common implementation then allows us to introduce auto-embedding for the AIS kernel estimator as well (included requiring adding a property for the number of surrogates to use to compute the bias for this one).

These changes now incorporate auto-embedding for all AIS continuous calculators, partially addressing issue #38
  • Loading branch information
jlizier committed May 16, 2018
1 parent 1c59192 commit e8603b0
Show file tree
Hide file tree
Showing 4 changed files with 337 additions and 608 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import java.util.Iterator;
import java.util.Vector;

import infodynamics.measures.continuous.kraskov.ActiveInfoStorageCalculatorKraskov;
import infodynamics.measures.continuous.kraskov.MutualInfoCalculatorMultiVariateKraskov;
import infodynamics.measures.continuous.kraskov.MutualInfoCalculatorMultiVariateKraskov1;
import infodynamics.utils.EmpiricalMeasurementDistribution;
import infodynamics.utils.MatrixUtils;

Expand All @@ -33,6 +36,8 @@
* <p>Usage is as per the paradigm outlined for {@link ActiveInfoStorageCalculator},
* except that in the constructor(s) for this class the implementation for
* a {@link MutualInfoCalculatorMultiVariate} must be supplied.
* Further properties may be set on this class via {@link #setProperty(String, String)}
* (including auto-embedding parameters) as described in the javadocs for that method.
* </p>
*
* <p>This class <i>may</i> be used directly, however users are advised that
Expand Down Expand Up @@ -90,6 +95,70 @@ public class ActiveInfoStorageCalculatorViaMutualInfo implements
*/
protected Vector<boolean[]> vectorOfValidityOfObservations;

/**
* Property name for the auto-embedding method. Defaults to {@link #AUTO_EMBED_METHOD_NONE}.
* Other valid values are {@link #AUTO_EMBED_METHOD_RAGWITZ} or
* {@link #AUTO_EMBED_METHOD_MAX_CORR_AIS}
*/
public static final String PROP_AUTO_EMBED_METHOD = "AUTO_EMBED_METHOD";
/**
* Valid value for the property {@link #PROP_AUTO_EMBED_METHOD} indicating that
* no auto embedding should be done (i.e. to use manually supplied parameters)
*/
public static final String AUTO_EMBED_METHOD_NONE = "NONE";
/**
* Valid value for the property {@link #PROP_AUTO_EMBED_METHOD} indicating that
* the Ragwitz optimisation technique should be used for automatic embedding
*/
public static final String AUTO_EMBED_METHOD_RAGWITZ = "RAGWITZ";
/**
* Valid value for the property {@link #PROP_AUTO_EMBED_METHOD} indicating that
* the automatic embedding should be done by maximising the bias corrected
* AIS (as per Garland et al. in the references above).
*/
public static final String AUTO_EMBED_METHOD_MAX_CORR_AIS = "MAX_CORR_AIS";
/**
* Internal variable tracking what type of auto embedding (if any)
* we are using
*/
protected String autoEmbeddingMethod = AUTO_EMBED_METHOD_NONE;

/**
* Property name for maximum k (embedding length) for the auto-embedding search. Default to 1
*/
public static final String PROP_K_SEARCH_MAX = "AUTO_EMBED_K_SEARCH_MAX";
/**
* Internal variable for storing the maximum embedding length to search up to for
* automating the parameters.
*/
protected int k_search_max = 1;

/**
* Property name for maximum tau (embedding delay) for the auto-embedding search. Default to 1
*/
public static final String PROP_TAU_SEARCH_MAX = "AUTO_EMBED_TAU_SEARCH_MAX";
/**
* Internal variable for storing the maximum embedding delay to search up to for
* automating the parameters.
*/
protected int tau_search_max = 1;

/**
* Property name for the number of nearest neighbours to use for the auto-embedding search (Ragwitz criteria).
* Defaults to match the value in use for {@link MutualInfoCalculatorMultiVariateKraskov#PROP_K}
*/
public static final String PROP_RAGWITZ_NUM_NNS = "AUTO_EMBED_RAGWITZ_NUM_NNS";
/**
* Internal variable for storing the number of nearest neighbours to use for the
* auto embedding search (Ragwitz criteria)
*/
protected int ragwitz_num_nns = 1;
/**
* Internal variable to track whether the property {@link #PROP_RAGWITZ_NUM_NNS} has been
* set yet
*/
protected boolean ragwitz_num_nns_set = false;

/**
* Construct using an instantiation of the named MI calculator
*
Expand Down Expand Up @@ -183,6 +252,27 @@ public void initialise(int k, int tau) throws Exception {
* <ul>
* <li>Those defined for the {@link ActiveInfoStorageCalculator} interface
* (i.e. {@link #K_PROP_NAME} or {@link #TAU_PROP_NAME})</li>
* <li>{@link #PROP_AUTO_EMBED_METHOD} -- method by which the calculator
* automatically determines the embedding history length ({@link #K_PROP_NAME})
* and embedding delay ({@link #TAU_PROP_NAME}). Default is {@link #AUTO_EMBED_METHOD_NONE} meaning
* values are set manually; other accepted values include: {@link #AUTO_EMBED_METHOD_RAGWITZ} for use
* of the Ragwitz criteria and {@link #AUTO_EMBED_METHOD_MAX_CORR_AIS} for using
* the maz bias-corrected AIS criteria (both searching up to {@link #PROP_K_SEARCH_MAX} and
* {@link #PROP_TAU_SEARCH_MAX}, as outlined by Garland et al. in the references list above).
* Use of any value other than {@link #AUTO_EMBED_METHOD_NONE}
* will lead to any previous settings for k and tau (via e.g. {@link #initialise(int, int)} or
* auto-embedding during previous calculations) will be overwritten after observations
* are supplied.</li>
* <li>{@link #PROP_K_SEARCH_MAX} -- maximum embedded history length to search
* up to if automatically determining the embedding parameters (as set by
* {@link #PROP_AUTO_EMBED_METHOD}); default is 1</li>
* <li>{@link #PROP_TAU_SEARCH_MAX} -- maximum embedded history length to search
* up to if automatically determining the embedding parameters (as set by
* {@link #PROP_AUTO_EMBED_METHOD}); default is 1</li>
* <li>{@link #PROP_RAGWITZ_NUM_NNS} -- number of nearest neighbours to use
* in the auto-embedding if the property {@link #PROP_AUTO_EMBED_METHOD}
* has been set to {@link #AUTO_EMBED_METHOD_RAGWITZ}. Defaults to the property value
* set for {@link MutualInfoCalculatorMultiVariateKraskov.PROP_K}</li>
* <li>Any properties defined for the underlying
* {@link MutualInfoCalculatorMultiVariate#setProperty(String, String)} implementation,
* <b>however</b> the user is <b>not</b> allowed to set the property
Expand Down Expand Up @@ -211,6 +301,19 @@ public void setProperty(String propertyName, String propertyValue)
k = Integer.parseInt(propertyValue);
} else if (propertyName.equalsIgnoreCase(TAU_PROP_NAME)) {
tau = Integer.parseInt(propertyValue);
} else if (propertyName.equalsIgnoreCase(PROP_AUTO_EMBED_METHOD)) {
// New method set for determining the embedding parameters
autoEmbeddingMethod = propertyValue;
} else if (propertyName.equalsIgnoreCase(PROP_K_SEARCH_MAX)) {
// Set max embedding history length for auto determination of embedding
k_search_max = Integer.parseInt(propertyValue);
} else if (propertyName.equalsIgnoreCase(PROP_TAU_SEARCH_MAX)) {
// Set maximum embedding delay for auto determination of embedding
tau_search_max = Integer.parseInt(propertyValue);
} else if (propertyName.equalsIgnoreCase(PROP_RAGWITZ_NUM_NNS)) {
// Set the number of nearest neighbours to use in case of Ragwitz auto embedding:
ragwitz_num_nns = Integer.parseInt(propertyValue);
ragwitz_num_nns_set = true;
} else {
// No property was set on this class, assume it is for the underlying
// MI calculator
Expand All @@ -231,6 +334,18 @@ public String getProperty(String propertyName)
return Integer.toString(k);
} else if (propertyName.equalsIgnoreCase(TAU_PROP_NAME)) {
return Integer.toString(tau);
} else if (propertyName.equalsIgnoreCase(PROP_AUTO_EMBED_METHOD)) {
return autoEmbeddingMethod;
} else if (propertyName.equalsIgnoreCase(PROP_K_SEARCH_MAX)) {
return Integer.toString(k_search_max);
} else if (propertyName.equalsIgnoreCase(PROP_TAU_SEARCH_MAX)) {
return Integer.toString(tau_search_max);
} else if (propertyName.equalsIgnoreCase(PROP_RAGWITZ_NUM_NNS)) {
if (ragwitz_num_nns_set) {
return Integer.toString(ragwitz_num_nns);
} else {
return miCalc.getProperty(MutualInfoCalculatorMultiVariateKraskov.PROP_K);
}
} else {
// No property was set on this class, assume it is for the underlying
// MI calculator, even if it is for
Expand Down Expand Up @@ -350,14 +465,143 @@ public void addObservations(double[] observations, int startTime,
* observation time series prior to their being processed and supplied
* to the underlying MI calculator.
* Primarily this is to allow the child implementation to automatically determine
* embedding parameters if desired.
* embedding parameters if desired, and a default implementation is provided
* for this for the main two auto-embedding methods.
* Child implementations do not need to override this default empty implementation
* if no new functionality is required.
*/
protected void preFinaliseAddObservations() throws Exception {
// Empty implementation supplied by default.
// Automatically determine the embedding parameters for the given time series

if (autoEmbeddingMethod.equalsIgnoreCase(AUTO_EMBED_METHOD_NONE)) {
return;
}
// Else we need to auto embed

// TODO Could make sure the rest of the code could handle k=0
// as default if nothing can improve on this, though
// I think I prefer k=1 to stay as default.
int k_candidate_best = 1;
int tau_candidate_best = 1;

if (autoEmbeddingMethod.equalsIgnoreCase(AUTO_EMBED_METHOD_RAGWITZ)) {
double bestPredictionError = Double.POSITIVE_INFINITY;
if (debug) {
System.out.printf("Beginning Ragwitz auto-embedding with k_max=%d, tau_max=%d\n",
k_search_max, tau_search_max);
}

for (int k_candidate = 1; k_candidate <= k_search_max; k_candidate++) {
for (int tau_candidate = 1; tau_candidate <= tau_search_max; tau_candidate++) {
try {
// Use a KSG MI calculator, which can do Ragwitz fairly easily.
MutualInfoCalculatorMultiVariateKraskov miCalcKraskov;
if (this instanceof ActiveInfoStorageCalculatorKraskov) {
// Use our internal MI calculator in case it has any particular
// properties we need to have been set already
miCalcKraskov = (MutualInfoCalculatorMultiVariateKraskov) miCalc;
} else {
// We'll create one to use, but we won't give the user the opportunity to set most of the properties
// on it, just the number of nearest neighbours. Leave NORM_TYPE etc as default.
miCalcKraskov = new MutualInfoCalculatorMultiVariateKraskov1();
}
prepareMICalculator(miCalcKraskov, k_candidate, tau_candidate);
// Now grab the prediction errors of the next value from the required number of
// nearest neighbours of the previous state: (array is of only one term)
double[] predictionError;
if (ragwitz_num_nns_set) {
predictionError =
miCalcKraskov.computePredictionErrorsFromObservations(false, ragwitz_num_nns);
} else {
predictionError =
miCalcKraskov.computePredictionErrorsFromObservations(false);
}
if (debug) {
System.out.printf("Embedding prediction error (dim=%d) for k=%d,tau=%d is %.3f\n",
predictionError.length, k_candidate, tau_candidate,
predictionError[0] / (double) miCalcKraskov.getNumObservations());
}
if ((predictionError[0] / (double) miCalcKraskov.getNumObservations())
< bestPredictionError) {
// This parameter setting is the best so far:
// (Note division by number of observations to normalise
// for less observations for larger k and tau)
bestPredictionError = predictionError[0] / (double) miCalcKraskov.getNumObservations();
k_candidate_best = k_candidate;
tau_candidate_best = tau_candidate;
}
if (k_candidate == 1) {
// tau is irrelevant, so no point testing other values
break;
}
} catch (Exception ex) {
throw new Exception("Exception encountered in attempting auto-embedding, evaluating candidates k=" + k_candidate +
", tau=" + tau_candidate, ex);
}
}
}
} else if (autoEmbeddingMethod.equalsIgnoreCase(AUTO_EMBED_METHOD_MAX_CORR_AIS)) {
double bestAIS = Double.NEGATIVE_INFINITY;
if (debug) {
System.out.printf("Beginning max bias corrected AIS auto-embedding with k_max=%d, tau_max=%d\n",
k_search_max, tau_search_max);
}

for (int k_candidate = 1; k_candidate <= k_search_max; k_candidate++) {
for (int tau_candidate = 1; tau_candidate <= tau_search_max; tau_candidate++) {
try {
// Use our internal MI calculator in case it has any particular
// properties we need to have been set already
prepareMICalculator(miCalc, k_candidate, tau_candidate);
// Now grab the AIS estimate here
double thisAIS = miCalc.computeAverageLocalOfObservations();
thisAIS -= computeAdditionalBiasToRemove();
if (debug) {
System.out.printf("AIS (bias corrected) for k=%d,tau=%d (%d samples) is %.5f\n",
k_candidate, tau_candidate, miCalc.getNumObservations(), thisAIS);
}
if (thisAIS > bestAIS) {
// This parameter setting is the best so far:
bestAIS = thisAIS;
k_candidate_best = k_candidate;
tau_candidate_best = tau_candidate;
}
if (k_candidate == 1) {
// tau is irrelevant, so no point testing other values
break;
}
} catch (Exception ex) {
throw new Exception("Exception encountered in attempting auto-embedding, evaluating candidates k=" + k_candidate +
", tau=" + tau_candidate, ex);
}
}
}
} else {
throw new RuntimeException("Unexpected value " + autoEmbeddingMethod +
" for property " + PROP_AUTO_EMBED_METHOD);
}

// Make sure the embedding length and delay are set here
k = k_candidate_best;
tau = tau_candidate_best;
if (debug) {
System.out.printf("Embedding parameters set to k=%d,tau=%d\n",
k, tau);
}
}

/**
* Internal method to compute any additional bias correction in the underlying calculator
* during auto-embedding in preFinaliseObservations if required.
*
* @return additional bias correction to remove (will be zero if assumed to be already bias corrected).
* @throws Exception
*/
protected double computeAdditionalBiasToRemove() throws Exception {
// Default implementation does nothing
return 0;
}

/* (non-Javadoc)
* @see infodynamics.measures.continuous.ActiveInfoStorageCalculator#finaliseAddObservations()
*/
Expand Down
Loading

0 comments on commit e8603b0

Please sign in to comment.