From 94053b8c3e69740558fc5786f37ee44ba6f9a918 Mon Sep 17 00:00:00 2001 From: Josipmrden Date: Sat, 26 Sep 2020 10:17:50 +0200 Subject: [PATCH] Finishing touches, added GEP. --- CMakeLists.txt | 23 +- direct_methods/MultipleTreeEvaluator.cpp | 192 ++--- direct_methods/MultipleTreeEvaluator.h | 5 +- executables/BenchmarkTesting.cpp | 119 ++- gep/AlgGEP.cpp | 465 ++++++++++++ gep/AlgGEP.h | 65 ++ gep/GEPChromosome.cpp | 700 ++++++++++++++++++ gep/GEPChromosome.h | 70 ++ gep/GEPChromosomeCrsGene.cpp | 53 ++ gep/GEPChromosomeCrsGene.h | 21 + gep/GEPChromosomeCrsOnePoint.cpp | 44 ++ gep/GEPChromosomeCrsOnePoint.h | 21 + gep/GEPChromosomeCrsTwoPoint.cpp | 64 ++ gep/GEPChromosomeCrsTwoPoint.h | 21 + gep/GEPChromosomeMutGauss.cpp | 54 ++ gep/GEPChromosomeMutGauss.h | 24 + gep/GEPChromosomeMutOp.cpp | 32 + gep/GEPChromosomeMutOp.h | 19 + implicit_functions/SymbolicRegressionUtil.cpp | 1 + implicit_functions/SymbolicRegressionUtil.h | 2 + .../cross_validation/InstanceRunner.cpp | 43 +- .../cross_validation/InstanceRunner.h | 3 + .../cross_validation/ParameterSet.cpp | 3 +- .../cross_validation/ParameterSet.h | 4 +- .../cross_validation/TweakableParameters.cpp | 10 +- .../cross_validation/TweakableParameters.h | 3 +- .../unordered/GepEvaluation.cpp | 205 +++++ implicit_functions/unordered/GepEvaluation.h | 41 + .../UnorderedMultiDimImplicitEvaluation.cpp | 2 +- 29 files changed, 2155 insertions(+), 154 deletions(-) create mode 100644 gep/AlgGEP.cpp create mode 100644 gep/AlgGEP.h create mode 100644 gep/GEPChromosome.cpp create mode 100644 gep/GEPChromosome.h create mode 100644 gep/GEPChromosomeCrsGene.cpp create mode 100644 gep/GEPChromosomeCrsGene.h create mode 100644 gep/GEPChromosomeCrsOnePoint.cpp create mode 100644 gep/GEPChromosomeCrsOnePoint.h create mode 100644 gep/GEPChromosomeCrsTwoPoint.cpp create mode 100644 gep/GEPChromosomeCrsTwoPoint.h create mode 100644 gep/GEPChromosomeMutGauss.cpp create mode 100644 gep/GEPChromosomeMutGauss.h create mode 100644 gep/GEPChromosomeMutOp.cpp create mode 100644 gep/GEPChromosomeMutOp.h create mode 100644 implicit_functions/unordered/GepEvaluation.cpp create mode 100644 implicit_functions/unordered/GepEvaluation.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9257b42..c129dd0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,8 +46,29 @@ set(SOURCE_FILES implicit_functions/cross_validation/CrossValUtil.h implicit_functions/cross_validation/ParameterSet.cpp implicit_functions/cross_validation/ParameterSet.h + utils/SymbRegLibrary.h + implicit_functions/cross_validation/DatasetInfo.cpp + implicit_functions/cross_validation/DatasetInfo.h + gep/AlgGEP.cpp + gep/AlgGEP.h + gep/GEPChromosome.cpp + gep/GEPChromosome.h + gep/GEPChromosomeCrsOnePoint.h + gep/GEPChromosomeCrsOnePoint.cpp + gep/GEPChromosomeCrsTwoPoint.cpp + gep/GEPChromosomeCrsTwoPoint.h + gep/GEPChromosomeMutGauss.cpp + gep/GEPChromosomeMutGauss.h + gep/GEPChromosomeMutOp.h + gep/GEPChromosomeMutOp.cpp + gep/GEPChromosomeCrsGene.h + gep/GEPChromosomeCrsGene.cpp executables/BenchmarkTesting.cpp - utils/SymbRegLibrary.h implicit_functions/cross_validation/DatasetInfo.cpp implicit_functions/cross_validation/DatasetInfo.h) + implicit_functions/unordered/GepEvaluation.cpp + implicit_functions/unordered/GepEvaluation.h + ap/APEvalOp.cpp + ap/APEvalOp.h + ) find_package(Boost) if(Boost_FOUND) diff --git a/direct_methods/MultipleTreeEvaluator.cpp b/direct_methods/MultipleTreeEvaluator.cpp index c150ee5..141724e 100644 --- a/direct_methods/MultipleTreeEvaluator.cpp +++ b/direct_methods/MultipleTreeEvaluator.cpp @@ -56,24 +56,6 @@ AbstractEvaluateOp *MultipleTreeEvaluator::createNew() { bool MultipleTreeEvaluator::containsAllVariables(IndividualP individual) { - //"* 4 4" -> circle - //" + * Y Y * X X" -> circle - - //"+ * Y Y * X X" -> sphere - //"- * 5 5 * Z Z" -> sphere - - //"+ * * X X X X" -> hyperbola - //"+ * Y Y 1.5" -> hyperbola - - //"/ * - X 1 - X 1 * 3 3" -> ellipse - //"- / * - Y 2 - Y 2 * 4 4 1" -> ellipse - - //"- Z * 0.1 Y" -> harmonic oscillator - //"* 3 X" -> harmonic oscillator - - //"- Z * 0.1 Y" -> nonlinear harmonic oscillator - //"* 9.8 sin X" -> nonlinear harmonic oscillator - Tree::Tree* first = (Tree::Tree*) individual->getGenotype(0).get(); Tree::Tree* second = (Tree::Tree*) individual->getGenotype(1).get(); @@ -119,7 +101,7 @@ void MultipleTreeEvaluator::initializeVariables(IndividualP individual) } } -bool MultipleTreeEvaluator::isLowStdevOnRandomValues(IndividualP individual) +bool MultipleTreeEvaluator::isLowStdevOnRandomValues(IndividualP individual, string op) { vector randomPoints; for (int i = 0; i < 10; i++) @@ -135,7 +117,7 @@ bool MultipleTreeEvaluator::isLowStdevOnRandomValues(IndividualP individual) } - double stdev = getFitnessFromPoints(individual, randomPoints); + double stdev = getFitnessFromPoints(individual, randomPoints, op); return stdev < 10E-2; @@ -143,104 +125,117 @@ bool MultipleTreeEvaluator::isLowStdevOnRandomValues(IndividualP individual) double MultipleTreeEvaluator::getFitnessFromPoints(IndividualP individual, vector points) { - vector additionResults; - vector subtractionResults; - vector multiplicationResults; - vector divisionResults; + Tree::Tree* first = getTreeAtIndex(individual, "", 0); + Tree::Tree* second = getTreeAtIndex(individual, "", 1); + + map> results; + for (string op : _operations) + { + vector emptyResults; + results[op] = emptyResults; + } for (int i = 0; i < points.size(); i++) { Point p = points[i]; - double result1 = getResult(individual, p, "+"); - double result2 = getResult(individual, p, "-"); - double result3 = getResult(individual, p, "*"); - double result4 = getResult(individual, p, "/"); - - additionResults.push_back(result1); - subtractionResults.push_back(result2); - multiplicationResults.push_back(result3); - divisionResults.push_back(result4); - } - double stdev1 = getStdev(additionResults); - double stdev2 = getStdev(subtractionResults); - double stdev3 = getStdev(multiplicationResults); - double stdev4 = getStdev(divisionResults); + for (int j = 0; j < this->_variables.size(); j++) + { + double value = p.coordinates[j]; + first->setTerminalValue(this->_variables[j], &value); + second->setTerminalValue(this->_variables[j], &value); + } + double firstResult; + double secondResult; + - double minimalStdev = stdev1; - if (stdev2 < minimalStdev) - { - minimalStdev = stdev2; - } - if (stdev3 < minimalStdev) - { - minimalStdev = stdev3; + first->execute(&firstResult); + second->execute(&secondResult); + + for (string op : _operations) + { + if (op == "+") + { + results[op].push_back(firstResult + secondResult); + } + else if (op == "-") + { + results[op].push_back(firstResult - secondResult); + } + else if (op == "*") + { + results[op].push_back(firstResult * secondResult); + } + else if (op == "/") + { + results[op].push_back(firstResult / secondResult); + } + } } - if (stdev4 < minimalStdev) + + double stdev = getStdev(results[_operations[0]]); + + for (string op : _operations) { - minimalStdev = stdev4; + double potentialStdev = getStdev(results[op]); + if (potentialStdev < stdev) + { + stdev = potentialStdev; + } } - return minimalStdev; + return stdev; } - -double MultipleTreeEvaluator::getResult(IndividualP individual, Point p, string op) +double MultipleTreeEvaluator::getFitnessFromPoints(IndividualP individual, vector points, string op) { - //"* 4 4" -> circle - //" + * Y Y * X X" -> circle - - //"+ * Y Y * X X" -> sphere - //"- * 5 5 * Z Z" -> sphere + Tree::Tree* first = getTreeAtIndex(individual, "", 0); + Tree::Tree* second = getTreeAtIndex(individual, "", 1); - //"+ * * X X X X" -> hyperbola - //"+ * Y Y 1.5" -> hyperbola + vector results; - //"/ * - X 1 - X 1 * 3 3" -> ellipse - //"- / * - Y 2 - Y 2 * 4 4 1" -> ellipse + for (int i = 0; i < points.size(); i++) + { + Point p = points[i]; - //"- Z * 0.1 Y" -> harmonic oscillator - //"* 3 X" -> harmonic oscillator + for (int j = 0; j < this->_variables.size(); j++) + { + double value = p.coordinates[j]; + first->setTerminalValue(this->_variables[j], &value); + second->setTerminalValue(this->_variables[j], &value); + } + double firstResult; + double secondResult; - //"- Z * 0.1 Y" -> nonlinear harmonic oscillator - //"* 9.8 sin X" -> nonlinear harmonic oscillator - Tree::Tree* first = getTreeAtIndex(individual, "", 0); - Tree::Tree* second = getTreeAtIndex(individual, "", 1); + first->execute(&firstResult); + second->execute(&secondResult); - for (int j = 0; j < this->_variables.size(); j++) - { - double value = p.coordinates[j]; - first->setTerminalValue(this->_variables[j], &value); - second->setTerminalValue(this->_variables[j], &value); + if (op == "+") + { + results.push_back(firstResult + secondResult); + } + else if (op == "-") + { + results.push_back(firstResult - secondResult); + } + else if (op == "*") + { + results.push_back(firstResult * secondResult); + } + else if (op == "/") + { + results.push_back(firstResult / secondResult); + } } - double firstResult; - double secondResult; - first->execute(&firstResult); - second->execute(&secondResult); + double stdev = getStdev(results); - if (op == "+") - { - return firstResult + secondResult; - } - else if (op == "-") - { - return firstResult - secondResult; - } - else if (op == "*") - { - return firstResult * secondResult; - } - else if (op == "/") - { - return firstResult / secondResult; - } - else return 0; + return stdev; } -//figure out which is the best FitnessP MultipleTreeEvaluator::evaluate(IndividualP individual) { + this->_operations = {"+", "-" ,"*", "/"}; Tree::Tree* first = getTreeAtIndex(individual, "", 0); Tree::Tree* second = getTreeAtIndex(individual, "", 1); vector allTrees = { first, second }; @@ -261,12 +256,21 @@ FitnessP MultipleTreeEvaluator::evaluate(IndividualP individual) return fitness; } - if (isLowStdevOnRandomValues(individual)) + vector leftoverOperators; + for (string op : _operations) { + if (!isLowStdevOnRandomValues(individual, op)) { + leftoverOperators.push_back(op); + } + } + + if (leftoverOperators.empty()) { fitness->setValue(domainSize * domainSize * domainSize); return fitness; } + _operations = leftoverOperators; + double minimalStdev = getFitnessFromPoints(individual, this->_points); if (isnan(minimalStdev) || fabs(minimalStdev) > punishment) { diff --git a/direct_methods/MultipleTreeEvaluator.h b/direct_methods/MultipleTreeEvaluator.h index f84c635..9f5da95 100644 --- a/direct_methods/MultipleTreeEvaluator.h +++ b/direct_methods/MultipleTreeEvaluator.h @@ -20,6 +20,7 @@ class MultipleTreeEvaluator : public AbstractEvaluateOp vector _points; vector _variables; bool _initializedVariables; + vector _operations; StateP _state; public: bool initialize(StateP state) override; @@ -30,10 +31,10 @@ class MultipleTreeEvaluator : public AbstractEvaluateOp MultipleTreeEvaluator(StateP state, string datasetFileName, ParetoFrontier* paretoFrontier); bool containsAllVariables(IndividualP individual); - bool isLowStdevOnRandomValues(IndividualP individual); + bool isLowStdevOnRandomValues(IndividualP individual, string op); void initializeVariables(IndividualP individual); - double getResult(IndividualP individual, Point p, string op); double getFitnessFromPoints(IndividualP individual, vector points); + double getFitnessFromPoints(IndividualP individual, vector points, string op); string getName() override { diff --git a/executables/BenchmarkTesting.cpp b/executables/BenchmarkTesting.cpp index d814f93..115359a 100644 --- a/executables/BenchmarkTesting.cpp +++ b/executables/BenchmarkTesting.cpp @@ -6,6 +6,10 @@ #include #include #include +#include +#include +#include "../gep/GEPChromosome.h" +#include "../gep/AlgGEP.h" using namespace std; @@ -14,72 +18,121 @@ TweakableParameters createTweakableParams() return TweakableParameters(); } -int main(int argc, char **argv) -{ +int main(int argc, char **argv) { string logDirectory = argv[2]; string evaluatorString = argv[3]; + string population = argv[4]; + string dataset = argv[5]; - StateP state (new State); + StateP state(new State); + state->addAlgorithm((GEPP)new GEP()); + GEPChromosome::GEPChromosome *chromosome = new GEPChromosome::GEPChromosome(); + state->addGenotype((GEPChromosomeP) chromosome); - auto* mte = new MultipleTreeEvaluator(); - auto* sse = new SimpleStdevEvaluator(); - auto* umdie = new UnorderedMultiDimImplicitEvaluation(); - auto* umdmge = new UnorderedMultiDimMultiGenEval(); + auto *mte = new MultipleTreeEvaluator(); + auto *sse = new SimpleStdevEvaluator(); + auto *umdie = new UnorderedMultiDimImplicitEvaluation(); + auto *umdmge = new UnorderedMultiDimMultiGenEval(); + auto* gep = new GepEvaluation(); + auto* ap = new APEvalOp(); - vector evaluators = {sse, mte, umdie, umdmge}; - AbstractEvaluateOp* evaluator; + vector evaluators = {sse, mte, umdie, umdmge, gep}; + AbstractEvaluateOp *evaluator; - int noRuns = 20; - string functionset = "+ - * / sin"; + int noRuns = 10; + string functionset = "+ - * /"; + string gaussFunctionSet = "+ - * / sin exp sqrt ngt"; - if (evaluatorString == "sse") - { + if (evaluatorString == "sse") { evaluator = sse; - } - else if (evaluatorString == "mte") - { + } else if (evaluatorString == "mte") { evaluator = mte; - } - else if (evaluatorString == "umdie") - { + } else if (evaluatorString == "umdie") { evaluator = umdie; - } - else if (evaluatorString == "umdmge") - { + } else if (evaluatorString == "umdmge") { evaluator = umdmge; + } else if (evaluatorString == "gep") { + evaluator = gep; + } else if (evaluatorString == "ap") + { + evaluator = ap; } vector datasets; string pointsDatasetDir = "./chosen_points_datasets/"; string planeDatasetDir = "./chosen_plane_datasets/"; string datasetType = pointsDatasetDir; - argv[1] = "parameters.txt"; + //argv[1] = "parameters.txt"; - if (evaluator->requiresPlanes) - { + if (evaluator->requiresPlanes) { datasetType = planeDatasetDir; } - if (evaluator->noTrees > 1) - { + if (evaluator->noTrees > 1) { argv[1] = "multipleGenotypeParameters.txt"; } - datasets.push_back(DatasetInfo("CIRCLE_4_0_0_100_150", datasetType + "circle_4_0_0_100_150", "X Y 4", functionset)); - datasets.push_back(DatasetInfo("SPHERE_5_0_0_250_300", datasetType + "sphere_5_0_0_250_300", "X Y Z 5", functionset)); - datasets.push_back(DatasetInfo("ELLIPSE_1_2_3_4_150_200", datasetType + "ellipse_1_2_3_4_150_200", "X Y 1 2 3 4", functionset)); - datasets.push_back(DatasetInfo("HYPERBOLA_1.5_100_150", datasetType + "hyperbola_1.5_100_150", "X Y 1.5", functionset)); - datasets.push_back(DatasetInfo("HARMOSC_50_100", datasetType + "harmosc_50_100", "X Y Z 0.1 3", functionset)); - datasets.push_back(DatasetInfo("NONLINHARMOSC_33_38", datasetType + "nlhmo_33_38", "X Y Z 0.1 9.8", functionset)); + if (dataset == "0") { + datasets.push_back( + DatasetInfo("CIRCLE_4_0_0_100_150", datasetType + "circle_4_0_0_100_150", "X Y 4", functionset)); + } else if (dataset == "1") { + datasets.push_back( + DatasetInfo("SPHERE_5_0_0_250_300", datasetType + "sphere_5_0_0_250_300", "X Y Z 5", functionset)); + } else if (dataset == "2") { + datasets.push_back( + DatasetInfo("ELLIPSE_1_2_3_4_150_200", datasetType + "ellipse_1_2_3_4_150_200", "X Y 1 2 3 4", + functionset)); + } else if (dataset == "3") { + datasets.push_back( + DatasetInfo("HYPERBOLA_1.5_100_150", datasetType + "hyperbola_1.5_100_150", "X Y 1.5", functionset)); + } else if (dataset == "4") { + datasets.push_back(DatasetInfo("HARMOSC_50_100", datasetType + "harmosc_50_100", "X Y Z 0.1 3", functionset)); + } else if (dataset == "5") { + datasets.push_back( + DatasetInfo("NONLINHARMOSC_33_38", datasetType + "nlhmo_33_38", "X Y Z 0.1 9.8", functionset)); + }else if (dataset == "6") { + datasets.push_back( + DatasetInfo("TESTCIRCLE_100_150", datasetType + "moved_circle_6_1_2_100_150", "X Y 6 1 2", functionset)); + }else if (dataset == "7") { + datasets.push_back( + DatasetInfo("TESTELLIPSE_100_150", datasetType + "moved_ellipse_2.5_5_3_4.5_100_150", "X Y 2.5 5 3 4.5 1", functionset)); + } + else if (dataset == "8") + { + datasets.push_back( + DatasetInfo("TESTGAUSS_200_220", datasetType + "gauss_200_220", "X Y 2 3.14159", gaussFunctionSet)); + } else if (dataset == "9") + { + datasets.push_back(DatasetInfo("TEST_CHALLENGE", datasetType + "I.18.4", "M1 M2 R1 R2 R", functionset)); + } TweakableParameters tp = createTweakableParams(); + if (population == "100") + { + tp.populationSizes = {100}; + } + else if (population == "500") + { + tp.populationSizes = {500}; + } + else if (population == "200") + { + tp.populationSizes = {200}; + } + + if (evaluatorString == "mte") + { + tp.mutationProbablities = {0.2}; + } + auto *instanceRunner = new InstanceRunner(noRuns, datasets, evaluator, tp, argc, argv, state, logDirectory); while (tp.hasNext(TweakableType::Population)) { ParameterSet ps = tp.getNext(TweakableType::Population); instanceRunner->runInstance(ps); + break; } return 0; diff --git a/gep/AlgGEP.cpp b/gep/AlgGEP.cpp new file mode 100644 index 0000000..1afd6e5 --- /dev/null +++ b/gep/AlgGEP.cpp @@ -0,0 +1,465 @@ +#include "AlgGEP.h" + +GEP::GEP(){ + name_ = "GEP"; + selFitPropOp = (SelFitnessProportionalOpP)(new SelFitnessProportionalOp); + selBestOp = (SelBestOpP)(new SelBestOp); + selRandomOp = (SelRandomOpP)(new SelRandomOp); +} + +void GEP::registerParameters(StateP state){ + registerParameter(state, "crxprob", (voidP) new double(0.5), ECF::DOUBLE, "crossover rate"); + registerParameter(state, "selpressure", (voidP) new double(10), ECF::DOUBLE, + "selection pressure: how much is the best individual 'better' than the worst"); + registerParameter(state, "inv.prob", (voidP) new double(0.1), ECF::DOUBLE, "inversion rate"); + registerParameter(state, "erc.inv.prob", (voidP) new double(0.1), ECF::DOUBLE, "ERC inversion rate"); + registerParameter(state, "trans.is.prob", (voidP) new double(0.1), ECF::DOUBLE, "IS transposition rate"); + registerParameter(state, "trans.ris.prob", (voidP) new double(0.1), ECF::DOUBLE, "RIS transposition rate"); + registerParameter(state, "trans.gene.prob", (voidP) new double(0.1), ECF::DOUBLE, "gene transposition rate"); + registerParameter(state, "trans.erc.prob", (voidP) new double(0.1), ECF::DOUBLE, "ERC transposition rate"); + registerParameter(state, "trans.max.length", (voidP) new double(3), ECF::DOUBLE, "maximum length of the transposition sequence"); + registerParameter(state, "inv.max.length", (voidP) new double(3), ECF::DOUBLE, "maximum length of the inversion sequence"); +} + +bool GEP::initialize(StateP state){ + selFitPropOp->initialize(state); + selFitPropOp->setSelPressure(10); + selBestOp->initialize(state); + selRandomOp->initialize(state); + + voidP crRateP = getParameterValue(state, "crxprob"); + crxRate_ = *((double*)crRateP.get()); + + voidP selPressP = getParameterValue(state, "selpressure"); + selPressure_ = *((double*)selPressP.get()); + selFitPropOp->setSelPressure(selPressure_); + + voidP invRateP = getParameterValue(state, "inv.prob"); + invRate_ = *((double*)invRateP.get()); + + invRateP = getParameterValue(state, "erc.inv.prob"); + invDcRate_ = *((double*)invRateP.get()); + + invRateP = getParameterValue(state, "inv.max.length"); + invMaxLength_ = *((double*)invRateP.get()); + + voidP transRateP = getParameterValue(state, "trans.is.prob"); + transISRate_ = *((double*)transRateP.get()); + + transRateP = getParameterValue(state, "trans.ris.prob"); + transRISRate_ = *((double*)transRateP.get()); + + transRateP = getParameterValue(state, "trans.gene.prob"); + transGeneRate_ = *((double*)transRateP.get()); + + transRateP = getParameterValue(state, "trans.erc.prob"); + transDcRate_ = *((double*)transRateP.get()); + + transRateP = getParameterValue(state, "trans.max.length"); + transMaxLength_ = *((double*)transRateP.get()); + + GEPChromosome::GEPChromosome* gen(new GEPChromosome::GEPChromosome); + if (state->getGenotypes()[0]->getName() != gen->getName()){ + ECF_LOG_ERROR(state, "Error: this algorithm accepts only a single GEPChromosome genotype."); + throw(""); + } + + return true; +} + +bool GEP::advanceGeneration(StateP state, DemeP deme){ + // elitism: copy current best individual + IndividualP best = selBestOp->select(*deme); + best = copy(best); + + // select individuals + std::vector wheel; + wheel = selFitPropOp->selectMany(*deme, (uint)deme->size()); + + // copy selected to new population + for (uint i = 0; i < wheel.size(); ++i) + wheel[i] = copy(wheel[i]); + + // replace old population + for (uint i = 0; i < deme->size(); i++) + replaceWith((*deme)[i], wheel[i]); + + ECF_LOG(state, 5, "Selected individuals:"); + for (uint i = 0; i < deme->size(); i++){ + ECF_LOG(state, 5, dbl2str(deme->at(i)->fitness->getValue())); + } + + // determine the number of crx operations + uint noCrx = (int)(deme->size() * crxRate_ / 2); + + // perform crossover + for (uint i = 0; i < noCrx; i++){ + + // select parents + IndividualP parent1 = selRandomOp->select(*deme); + IndividualP parent2 = selRandomOp->select(*deme); + ECF_LOG(state, 5, "Parents: " + dbl2str(parent1->fitness->getValue()) + ", " + dbl2str(parent2->fitness->getValue())); + + // create children + IndividualP child1 = copy(parent1); + IndividualP child2 = copy(parent2); + + // perform crx operations + mate(parent1, parent2, child1); + mate(parent1, parent2, child2); + + // replace parents with children + replaceWith(parent1, child1); + replaceWith(parent2, child2); + } + + // perform mutation on whole population + mutate(*deme); + + // perform inversion on population + invert(state, *deme); + invertDc(state, *deme); + // perform transposition on population + transpose(state, *deme); + // evaluate new individuals + for (uint i = 0; i < deme->size(); i++) + if (!deme->at(i)->fitness->isValid()) { + evaluate(deme->at(i)); + } + + // elitism: preserve best individual + IndividualP random = selRandomOp->select(*deme); + if (best->fitness->isBetterThan(random->fitness)) + replaceWith(random, best); + + return true; +} + +void GEP::invert(StateP state, const std::vector& pool){ + for (uint i = 0; i < pool.size(); i++) { + if (state_->getRandomizer()->getRandomDouble() <= invRate_) { + // invert the i-th individual + ECF_LOG(state, 5, "Inverting individual: \n" + pool.at(i)->toString()); + // first, choose a random gene in the individual + GEPChromosome::GEPChromosome* individual = (boost::static_pointer_cast (pool.at(i)->getGenotype())).get(); + pool.at(i)->fitness->setInvalid(); + uint invGene = state_->getRandomizer()->getRandomInteger(individual->genes); + uint geneOffset = invGene * (individual->geneLength); + + // next, choose two random points in the head area + uint bitInv = state_->getRandomizer()->getRandomInteger(individual->headLength); + uint bitInvSecond, tmp; + do + { + bitInvSecond = state_->getRandomizer()->getRandomInteger(individual->headLength); + } while (bitInv == bitInvSecond); + + if (bitInv>bitInvSecond) + { + tmp = bitInv; + bitInv = bitInvSecond; + bitInvSecond = tmp; + } + // shorten the sequence to the specified maximum length + if (bitInvSecond - bitInv >= invMaxLength_){ + bitInvSecond = bitInv + (uint) invMaxLength_ - 1; + } + + ECF_LOG(state, 5, "Inverting the sequence in gene ["+uint2str(invGene)+"] from point (" + uint2str(bitInv) + ") to point (" + uint2str(bitInvSecond) + ")"); + // copy the selected sequence in reverse order + std::vector seq; + for (int j = (geneOffset + bitInvSecond); j >= (int) (geneOffset + bitInv); j--){ + seq.push_back(static_cast(new Tree::Node(individual->at(j)))); + } + // replace the original sequence with its inversion + uint seqIdx = 0; + for (uint j = (geneOffset + bitInv); j <= (geneOffset + bitInvSecond); j++){ + individual->at(j) = static_cast(new Tree::Node(seq.at(seqIdx++))); + } + ECF_LOG(state, 5, "Inverted individual: \n" + pool[i]->toString()); + } + + } +} + +void GEP::invertDc(StateP state, const std::vector& pool){ + // Do a test and check whether the Dc domain actually exists + GEPChromosome::GEPChromosome* test = (boost::static_pointer_cast (pool.at(0)->getGenotype())).get(); + if (test->dcLength < 1){ + ECF_LOG(state, 5, "ERCs not used in the cromosome. Dc inversion aborted."); + return; // Do nothing if ERCs are not used + } + for (uint i = 0; i < pool.size(); i++) { + if (state_->getRandomizer()->getRandomDouble() <= invDcRate_) { + // invert the i-th individual + pool.at(i)->fitness->setInvalid(); + ECF_LOG(state, 5, "Inverting ERCs of individual: \n" + pool.at(i)->toString()); + // first, choose a random gene in the individual + GEPChromosome::GEPChromosome* individual = (boost::static_pointer_cast (pool.at(i)->getGenotype())).get(); + uint invGene = state_->getRandomizer()->getRandomInteger(individual->genes); + uint geneOffset = invGene * (individual->geneLength); + uint dcStart = geneOffset + individual->headLength + individual->tailLength; + + // next, choose two random points in the Dc tail area + uint bitInv = state_->getRandomizer()->getRandomInteger(individual->dcLength); + uint bitInvSecond, tmp; + do + { + bitInvSecond = state_->getRandomizer()->getRandomInteger(individual->dcLength); + } while (bitInv == bitInvSecond); + + if (bitInv>bitInvSecond) + { + tmp = bitInv; + bitInv = bitInvSecond; + bitInvSecond = tmp; + } + // shorten the sequence to the specified maximum length + if (bitInvSecond - bitInv >= invMaxLength_){ + bitInvSecond = bitInv + (uint)invMaxLength_ - 1; + } + + ECF_LOG(state, 5, "Inverting the sequence in gene [" + uint2str(invGene) + "] from point (" + uint2str(bitInv) + ") to point (" + uint2str(bitInvSecond) + ")"); + // copy the selected sequence in reverse order + std::vector seq; + for (int j = (dcStart + bitInvSecond); j >= (int)(dcStart + bitInv); j--){ + seq.push_back(static_cast(new Tree::Node(individual->at(j)))); + } + // replace the original sequence with its inversion + uint seqIdx = 0; + for (uint j = (dcStart + bitInv); j <= (dcStart + bitInvSecond); j++){ + individual->at(j) = static_cast(new Tree::Node(seq.at(seqIdx++))); + } + ECF_LOG(state, 5, "Inverted individual: \n" + pool[i]->toString()); + } + + } +} + +void GEP::transpose(StateP state, const std::vector& pool){ + if(transISRate_ > 0) transposeIS(state, pool); + if(transRISRate_ > 0) transposeRIS(state, pool); + if(transGeneRate_ > 0) transposeGene(state, pool); + if (transDcRate_ > 0) transposeDc(state, pool); + +} + +void GEP::transposeIS(StateP state, const std::vector& pool){ + for (uint i = 0; i < pool.size(); i++) { + if (state_->getRandomizer()->getRandomDouble() <= transISRate_) { + ECF_LOG(state, 5, "Transposing individual using IS transposition: \n" + pool.at(i)->toString()); + pool.at(i)->fitness->setInvalid(); + // first, choose a random gene in the individual + GEPChromosome::GEPChromosome* individual = (boost::static_pointer_cast (pool.at(i)->getGenotype())).get(); + uint invGene = state_->getRandomizer()->getRandomInteger(individual->genes); + uint geneOffset = invGene * (individual->geneLength); + + // next, choose two random points in the gene which delimit the copied sequence + uint bitSeq = state_->getRandomizer()->getRandomInteger(individual->headLength+individual->tailLength); + uint bitSeqSecond, tmp; + do + { + bitSeqSecond = state_->getRandomizer()->getRandomInteger(individual->headLength+individual->tailLength); + } while (bitSeq == bitSeqSecond); + + if (bitSeq > bitSeqSecond) + { + tmp = bitSeq; + bitSeq = bitSeqSecond; + bitSeqSecond = tmp; + } + + // shorten the sequence to the specified maximum length + if (bitSeqSecond - bitSeq >= transMaxLength_){ + bitSeqSecond = bitSeq + (uint) transMaxLength_ - 1; + } + + + // next, choose the random point in the head area in which the sequence will be inserted (root excluded) + uint bitPos = state_->getRandomizer()->getRandomInteger(1, individual->headLength - 1); + ECF_LOG(state, 5, "Transposing the sequence in gene [" + uint2str(invGene) + "] from point (" + uint2str(bitSeq) + ") to point (" + uint2str(bitSeqSecond) + ") into position (" + uint2str(bitPos) + ")"); + + // make a copy of the entire gene + std::vector seq; + for (uint j = geneOffset; j < (geneOffset + individual->headLength + individual->tailLength); j++){ + seq.push_back(static_cast(new Tree::Node(individual->at(j)))); + } + // replace the original sequence first with the transposon + uint seqIdx = bitSeq; + uint r; + for (r = (geneOffset+bitPos); r <= (geneOffset+bitPos+bitSeqSecond-bitSeq) && r < (geneOffset+individual->headLength); r++){ + individual->at(r) = static_cast(new Tree::Node(seq.at(seqIdx++))); + } + // now, copy the original elements from the transposition point until the end of the head + seqIdx = bitPos; + for (; r < (geneOffset+individual->headLength); r++){ + individual->at(r) = static_cast(new Tree::Node(seq.at(seqIdx++))); + } + ECF_LOG(state, 5, "Transposed individual: \n" + pool[i]->toString()); + } + } +} + +void GEP::transposeRIS(StateP state, const std::vector& pool){ + for (uint i = 0; i < pool.size(); i++) { + if (state_->getRandomizer()->getRandomDouble() <= transRISRate_) { + ECF_LOG(state, 5, "Transposing individual using RIS transposition: \n" + pool.at(i)->toString()); + // first, choose a random gene in the individual + GEPChromosome::GEPChromosome* individual = (boost::static_pointer_cast (pool.at(i)->getGenotype())).get(); + uint invGene = state_->getRandomizer()->getRandomInteger(individual->genes); + uint geneOffset = invGene * (individual->geneLength); + + // next, choose a random point in the head and scan until a function is found or the end of the head is reached + uint bitSeq = state_->getRandomizer()->getRandomInteger(1, individual->headLength-1); + while (bitSeq < individual->headLength && !(individual->primitiveSet_->getFunctionByName(individual->at(bitSeq)->primitive_->getName()))){ + bitSeq++; + } + // do nothing if no function found + if (bitSeq >= individual->headLength){ + ECF_LOG(state, 5, "No function nodes found. Transposition aborted."); + continue; + } + pool.at(i)->fitness->setInvalid(); + // randomly choose a point between the first one and the end of the gene + uint bitSeqSecond, tmp; + do + { + bitSeqSecond = state_->getRandomizer()->getRandomInteger(bitSeq,individual->headLength+individual->tailLength - 1); + } while (bitSeq == bitSeqSecond); + + if (bitSeq > bitSeqSecond) + { + tmp = bitSeq; + bitSeq = bitSeqSecond; + bitSeqSecond = tmp; + } + + // shorten the sequence to the specified maximum length + if (bitSeqSecond - bitSeq >= transMaxLength_){ + bitSeqSecond = bitSeq + (uint) transMaxLength_ - 1; + } + + // transpose to the root + ECF_LOG(state, 5, "Transposing the sequence in gene [" + uint2str(invGene) + "] from point (" + uint2str(bitSeq) + ") to point (" + uint2str(bitSeqSecond) + ") into root"); + + // make a copy of the entire gene + std::vector seq; + for (uint j = geneOffset; j < (geneOffset + individual->headLength + individual->tailLength); j++){ + seq.push_back(static_cast(new Tree::Node(individual->at(j)))); + } + // replace the original sequence first with the transposon + uint seqIdx = bitSeq; + uint r; + for (r = geneOffset; r <= (geneOffset + bitSeqSecond - bitSeq) && r < (geneOffset + individual->headLength); r++){ + individual->at(r) = static_cast(new Tree::Node(seq.at(seqIdx++))); + } + // now, copy the original elements from the transposition point until the end of the head + seqIdx = 0; + for (; r < (geneOffset + individual->headLength); r++){ + individual->at(r) = static_cast(new Tree::Node(seq.at(seqIdx++))); + } + ECF_LOG(state, 5, "Transposed individual: \n" + pool[i]->toString()); + } + } +} + +void GEP::transposeGene(StateP state, const std::vector& pool){ + for (uint i = 0; i < pool.size(); i++) { + if (state_->getRandomizer()->getRandomDouble() <= transGeneRate_) { + ECF_LOG(state, 5, "Transposing individual using gene transposition: \n" + pool.at(i)->toString()); + // first, choose a random gene in the individual + GEPChromosome::GEPChromosome* individual = (boost::static_pointer_cast (pool.at(i)->getGenotype())).get(); + if (individual->genes == 1){ + ECF_LOG(state, 5, "Gene transposition failed: genotype consists of one gene"); + return; + } + pool.at(i)->fitness->setInvalid(); + uint invGene = state_->getRandomizer()->getRandomInteger(1,individual->genes-1); + uint geneOffset = invGene * (individual->geneLength); + + ECF_LOG(state, 5, "Transposing gene (" + uint2str(invGene)+") to head"); + + // make separate copies of the transposon gene and the rest of the chromosome + std::vector seq, copy; + for (uint j = 0; j < individual->size(); j++){ + if (j >= geneOffset && j < (geneOffset + individual->geneLength)) + seq.push_back(static_cast(new Tree::Node(individual->at(j)))); + else + copy.push_back(static_cast(new Tree::Node(individual->at(j)))); + } + // replace the original sequence first with the transposon + uint copyIdx = 0; + individual->clear(); + for (uint r = 0; r < seq.size(); r++){ + individual->push_back(static_cast(new Tree::Node(seq.at(r)))); + } + // now, copy the rest of the original elements + for (uint r = 0; r < copy.size(); r++){ + individual->push_back(static_cast(new Tree::Node(copy.at(r)))); + } + ECF_LOG(state, 5, "Transposed individual: \n" + pool[i]->toString()); + std::vector x; + } + } +} + +void GEP::transposeDc(StateP state, const std::vector& pool){ + // Do a test and check whether the Dc domain actually exists + GEPChromosome::GEPChromosome* test = (boost::static_pointer_cast (pool.at(0)->getGenotype())).get(); + if (test->dcLength < 1){ + ECF_LOG(state, 5, "ERCs not used in the chromosome. Dc transposition aborted."); + return; // Do nothing if ERCs are not used + } + for (uint i = 0; i < pool.size(); i++) { + if (state_->getRandomizer()->getRandomDouble() <= transDcRate_) { + pool.at(i)->fitness->setInvalid(); + ECF_LOG(state, 5, "Transposing the constant domain: \n" + pool.at(i)->toString()); + // first, choose a random gene in the individual + GEPChromosome::GEPChromosome* individual = (boost::static_pointer_cast (pool.at(i)->getGenotype())).get(); + uint invGene = state_->getRandomizer()->getRandomInteger(individual->genes); + uint geneOffset = invGene * (individual->geneLength); + uint dcStart = geneOffset + individual->headLength + individual->tailLength; + // next, choose two random points in the gene which delimit the copied sequence + uint bitSeq = state_->getRandomizer()->getRandomInteger(individual->dcLength); + uint bitSeqSecond, tmp; + do + { + bitSeqSecond = state_->getRandomizer()->getRandomInteger(individual->dcLength); + } while (bitSeq == bitSeqSecond); + + if (bitSeq > bitSeqSecond) + { + tmp = bitSeq; + bitSeq = bitSeqSecond; + bitSeqSecond = tmp; + } + + // shorten the sequence to the specified maximum length + if (bitSeqSecond - bitSeq >= transMaxLength_){ + bitSeqSecond = bitSeq + (uint)transMaxLength_ - 1; + } + + // next, choose the random point in the ERC area in which the sequence will be inserted + uint bitPos = state_->getRandomizer()->getRandomInteger(individual->dcLength); + ECF_LOG(state, 5, "Transposing the sequence in gene [" + uint2str(invGene) + "] from point (" + uint2str(bitSeq) + ") to point (" + uint2str(bitSeqSecond) + ") into position (" + uint2str(bitPos) + ")"); + + // make a copy of the entire Dc tail + std::vector seq; + for (uint j = dcStart; j < (dcStart + individual->dcLength); j++){ + seq.push_back(static_cast(new Tree::Node(individual->at(j)))); + } + // replace the original sequence first with the transposon + uint seqIdx = bitSeq; + uint r; + for (r = (dcStart + bitPos); r <= (dcStart + bitPos + bitSeqSecond - bitSeq) && r < (dcStart + individual->dcLength); r++){ + individual->at(r) = static_cast(new Tree::Node(seq.at(seqIdx++))); + } + // now, copy the original elements from the transposition point until the end of the Dc tail + seqIdx = bitPos; + for (; r < (dcStart + individual->dcLength); r++){ + individual->at(r) = static_cast(new Tree::Node(seq.at(seqIdx++))); + } + ECF_LOG(state, 5, "Transposed individual: \n" + pool[i]->toString()); + } + } +} \ No newline at end of file diff --git a/gep/AlgGEP.h b/gep/AlgGEP.h new file mode 100644 index 0000000..d39bfc4 --- /dev/null +++ b/gep/AlgGEP.h @@ -0,0 +1,65 @@ +#ifndef ALGGEP_H +#define ALGGEP_H + +#include "ECF/ECF_base.h" +#include "ECF/Algorithm.h" +#include "ECF/SelRandomOp.h" +#include "ECF/SelBestOp.h" +#include "ECF/SelFitnessProportionalOp.h" +#include "GEPChromosome.h" +/** +* \brief Generational algorithm with roulette wheel selection operator and unique operators and chromosome representation +* \ingroup algorithms serial +* +* This algorithm requires one GEPChromosome genotype. +* +* The algorithm flow: +* +\verbatim +single generation { +select individuals to form the new generation (fitness proportional selection operator); +create new generation (make copies); +noCrx = (deme size) * / 2; +repeat( times) { +randomly select two parents; +perform crossover, _replace_ parents with their children; +} +perform mutation on new generation; +perform transposition on new generation; +perform inversion on new generation +} +\endverbatim +* +*/ +class GEP : public Algorithm{ +public: + GEP(); + void registerParameters(StateP state); + bool initialize(StateP state); + bool advanceGeneration(StateP state, DemeP deme); + void invert(StateP state, const std::vector& pool); + void invertDc(StateP state, const std::vector& pool); + void transpose(StateP state, const std::vector& pool); + void transposeIS(StateP state, const std::vector& pool); + void transposeRIS(StateP state, const std::vector& pool); + void transposeGene(StateP state, const std::vector& pool); + void transposeDc(StateP state, const std::vector& pool); + +protected: + double crxRate_; //!< crossover rate + double selPressure_; //!< selection pressure + double invRate_; //!< inversion rate + double invDcRate_; //!< Constant domain inversion rate + double invMaxLength_; //!< inversion maximum sequence length + double transISRate_; //!< IS transposition rate + double transRISRate_; //!< RIS transposition rate + double transGeneRate_; //!< gene transposition rate + double transDcRate_; //!< constant domain transposition rate + double transMaxLength_; //!< maximum sequence transposition length + + SelRandomOpP selRandomOp; + SelBestOpP selBestOp; + SelFitnessProportionalOpP selFitPropOp; +}; +typedef boost::shared_ptr GEPP; +#endif // AlgGEP_h \ No newline at end of file diff --git a/gep/GEPChromosome.cpp b/gep/GEPChromosome.cpp new file mode 100644 index 0000000..f8abdd2 --- /dev/null +++ b/gep/GEPChromosome.cpp @@ -0,0 +1,700 @@ +#include "GEPChromosome.h" +namespace GEPChromosome{ + // mandatory: define name, other things as needed + GEPChromosome::GEPChromosome(){ + Genotype::name_ = "GEPChromosome"; + usesERC = false; + dcLength = 0; + staticLink = false; + } + + // mandatory: must provide copy method + GEPChromosome* GEPChromosome::copy(){ + GEPChromosome *newObject = new GEPChromosome(*this); + return newObject; + } + + // optional: declare crx operators (if not, no crossover will be performed) + std::vector GEPChromosome::getCrossoverOp() + { + std::vector crx; + crx.push_back(static_cast (new GEPChromosomeCrsOnePoint)); + crx.push_back(static_cast (new GEPChromosomeCrsTwoPoint)); + crx.push_back(static_cast (new GEPChromosomeCrsGene)); + return crx; + } + + // optional: declare mut operators (if not, no mutation will be performed) + std::vector GEPChromosome::getMutationOp() + { + std::vector mut; + mut.push_back(static_cast (new GEPChromosomeMutOp)); + mut.push_back(static_cast (new GEPChromosomeMutGauss)); + return mut; + } + + // optional: register any parameters + void GEPChromosome::registerParameters(StateP state) + { + registerParameter(state, "headlength", (voidP)(new uint(1)), ECF::UINT); + registerParameter(state, "genes", (voidP)(new uint(1)), ECF::UINT); + registerParameter(state, "functionset", (voidP)(new std::string), ECF::STRING); + registerParameter(state, "terminalset", (voidP)(new std::string), ECF::STRING); + registerParameter(state, "linkingfunctions", (voidP)(new std::string), ECF::STRING); + registerParameter(state, "linklength", (voidP)(new uint(1)), ECF::UINT); + } + + void GEPChromosome::generateChromosome(){ + Tree::Node* node; + for (uint i = 0; i < genes; i++){ + // Generate random primitives for the head (Functions + Terminals) + for (uint j = 0; j < headLength; j++) { + node = new Tree::Node(); + node->setPrimitive(primitiveSet_->getRandomPrimitive()); + this->push_back(static_cast(node)); + } + // Generate random terminals for the tail + for (uint j = 0; j < tailLength; j++) { + node = new Tree::Node(); + node->setPrimitive(primitiveSet_->getRandomTerminal()); + this->push_back(static_cast(node)); + } + // Generate ERCs for the Dc domain + for (uint j = 0; j < dcLength; j++){ + node = new Tree::Node(); + node->setPrimitive(ercSet_->getRandomTerminal()); + this->push_back(static_cast(node)); + } + } + // Set the homeotic gene (this controls the linking functions) + for (uint i = 0; i < linkHeadLength; i++){ + node = new Tree::Node(); + node->setPrimitive(linkFunctionSet_->getRandomPrimitive()); + this->push_back(static_cast(node)); + } + for (uint i = 0; i < linkTailLength; i++){ + node = new Tree::Node(); + node->setPrimitive(linkFunctionSet_->getRandomTerminal()); + this->push_back(static_cast(node)); + } + } + + // mandatory: build initial genotype structure + bool GEPChromosome::initialize(StateP state) + { + // 'homegep' is a Gep instance kept in the State; + // we use it to link the PrimitiveSet to it and store the parameters + GEPChromosome* homegep = (GEPChromosome*)state->getGenotypes()[genotypeId_].get(); + state_ = state; + + // if we are the first one to initialize + if (!homegep->primitiveSet_){ + initializeFirst(homegep); + } + // in any case, read parameters from from hometree + this->primitiveSet_ = homegep->primitiveSet_; + this->linkFunctionSet_ = homegep->linkFunctionSet_; + this->ercSet_ = homegep->ercSet_; + this->headLength = homegep->headLength; + this->genes = homegep->genes; + this->tailLength = homegep->tailLength; + this->dcLength = homegep->dcLength; + this->geneLength = homegep->geneLength; + this->linkHeadLength = homegep->linkHeadLength; + this->linkTailLength = homegep->linkTailLength; + // generate the chromosome + generateChromosome(); + + return true; + } + + void GEPChromosome::initializeFirst(GEPChromosome* home){ + + // create and link PrimitiveSet to the hometree + if (home == NULL){ + return; + } + home->primitiveSet_ = static_cast (new Tree::PrimitiveSet); + home->primitiveSet_->initialize(state_); + this->primitiveSet_ = home->primitiveSet_; + + home->linkFunctionSet_ = static_cast (new Tree::PrimitiveSet); + home->linkFunctionSet_->initialize(state_); + this->linkFunctionSet_ = home->linkFunctionSet_; + + home->ercSet_ = static_cast (new Tree::PrimitiveSet); + home->ercSet_->initialize(state_); + this->ercSet_ = home->ercSet_; + + // read number of genes, store in hometree + voidP sptr = getParameterValue(state_, "genes"); + home->genes = *((uint*)sptr.get()); + + if (home->genes < 1) { + ECF_LOG_ERROR(state_, "Gep genotype: number of genes must be >=1"); + } + + // add user defined functions to primitiveSet + + for (int i = 0; i < (int)userFunctions_.size(); i++) { + primitiveSet_->mAllPrimitives_[userFunctions_[i]->getName()] = userFunctions_[i]; + } + + uint maxArg = 0; + uint tmpArg = 0; + // read function set from the configuration + sptr = getParameterValue(state_, "functionset"); + std::stringstream names; + std::string name; + names << *((std::string*) sptr.get()); + while (names >> name) { + if (!primitiveSet_->addFunction(name)) { + ECF_LOG_ERROR(state_, "Error: unknown function in function set (\'" + name + "\')!"); + throw(""); + } + tmpArg = primitiveSet_->getPrimitiveByName(name)->getNumberOfArguments(); + if (tmpArg > maxArg) + maxArg = tmpArg; + } + // read Gep head length, store in hometree + sptr = getParameterValue(state_, "headlength"); + home->headLength = *((uint*)sptr.get()); + + if (home->headLength < 1) { + ECF_LOG_ERROR(state_, "Gep genotype: length of head must be >= 1"); + } + + // now we can tell how long tail must be + home->tailLength = home->headLength * (maxArg - 1) + 1; + home->geneLength = home->headLength + home->tailLength; + + if (primitiveSet_->getFunctionSetSize() == 0) { + ECF_LOG_ERROR(state_, "Tree genotype: empty function set!"); + throw(""); + } + + // create and link the linking function set + // Mono-genic chromosomes should have a constant homeotic gene for the sake of simplicity (i.e., their homeotic gene shall always be "0") + // Multi-genic chromosomes can evolve their homeotic gene by default, unless a static linking configuration is specified (TO-DO) + + uint linkMaxArg = 0; + uint linkTmpArg = 0; + std::stringstream linkNames; + // read linking function set from the configuration + sptr = getParameterValue(state_, "linkingfunctions"); + linkNames << *((std::string*) sptr.get()); + while (linkNames >> name) { + if (!linkFunctionSet_->addFunction(name)) { + ECF_LOG_ERROR(state_, "Error: unknown function in linking function set (\'" + name + "\')!"); + throw(""); + } + linkTmpArg = linkFunctionSet_->getPrimitiveByName(name)->getNumberOfArguments(); + if (linkTmpArg > linkMaxArg) + linkMaxArg = linkTmpArg; + } + // read homeotic gene head length, store in hometree + sptr = getParameterValue(state_, "linklength"); + home->linkHeadLength = *((uint*)sptr.get()); + + if (home->linkHeadLength < 1) { + ECF_LOG_ERROR(state_, "Gep genotype: length of linking function gene head must be >= 1"); + } + + // now we can tell how long tail must be + home->linkTailLength = home->linkHeadLength * (linkMaxArg - 1) + 1; + + if (linkFunctionSet_->getFunctionSetSize() == 0) { + ECF_LOG_ERROR(state_, "GEP genotype: empty linking function set!"); + } + // Add "terminals" to the linking function set. These will be a prefix + integers from [0, # of genes] + for (uint i = 0; i < home->genes; i++){ + Tree::PrimitiveP geneTerminals = (Tree::PrimitiveP)(new Tree::Primitives::Terminal); + std::string geneTermStr = GEP_GENE_PREFIX; + geneTermStr += uint2str(i); + geneTerminals->setName(geneTermStr); + geneTerminals->initialize(state_); + linkFunctionSet_->addTerminal(geneTerminals); + } + // set default terminal type + Tree::Primitives::terminal_type currentType = Tree::Primitives::Double; + Tree::type_iter typeIter; + + // read terminal set from the configuration + + std::stringstream tNames; + sptr = getParameterValue(state_, "terminalset"); + tNames << *((std::string*) sptr.get()); + + while (tNames >> name) { + // read current terminal type (if set) + typeIter = primitiveSet_->mTypeNames_.find(name); + if (typeIter != primitiveSet_->mTypeNames_.end()) { + currentType = typeIter->second; + continue; + } + + // see if it's a user-defined terminal + /* + uint iTerminal = 0; + for (; iTerminal < userTerminals_.size(); iTerminal++) + if (userTerminals_[iTerminal]->getName() == name) + break; + if (iTerminal < userTerminals_.size()) { + primitiveSet_->addTerminal(userTerminals_[iTerminal]); + continue; + } + */ + // read ERC definition (if set) + // ERC's are defined as interval [x y] or set {a b c} + // If ERCs are requested by the user, we add the placeholder terminal '?' to primitiveSet_ + // We then add any ERCs to ercSet_ + + if (name[0] == '[' || name[0] == '{') { + + //if this is the first ERC range we encounter, add the placeholder and switch on the ERC flag + if (!usesERC){ + usesERC = true; + Tree::PrimitiveP placeholder = (Tree::PrimitiveP) (new Tree::Primitives::Terminal); + placeholder->setName("?"); + primitiveSet_->addTerminal(placeholder); + // If ERCs are used, the length of the Dc domain is the same as the tail length + home->dcLength = home->tailLength; + home->geneLength += home->dcLength; + } + + std::string ercValues = ""; + + // name this ERC (ERC's name is its value!) + Tree::PrimitiveP erc; + switch (currentType) { + case Tree::Primitives::Double: + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERCD); + ercValues = DBL_PREFIX; + break; + case Tree::Primitives::Int: + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERC); + ercValues = INT_PREFIX; + break; + case Tree::Primitives::Bool: + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERC); + ercValues = BOOL_PREFIX; + break; + case Tree::Primitives::Char: + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERC); + ercValues = CHR_PREFIX; + break; + case Tree::Primitives::String: + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERC); + ercValues = STR_PREFIX; + break; + } + + while (name[name.size() - 1] != ']' && name[name.size() - 1] != '}') { + ercValues += " " + name; + tNames >> name; + } + ercValues += " " + name; + erc->setName(ercValues); + erc->initialize(state_); + ercSet_->addTerminal(erc); + + continue; + } + + //read terminal of current type + Tree::PrimitiveP terminal; + switch (currentType) + { + case Tree::Primitives::Double: + terminal = (Tree::PrimitiveP) (new Tree::Primitives::Terminal); break; + case Tree::Primitives::Int: + terminal = (Tree::PrimitiveP) (new Tree::Primitives::TerminalT); break; + case Tree::Primitives::Bool: + terminal = (Tree::PrimitiveP) (new Tree::Primitives::TerminalT); break; + case Tree::Primitives::Char: + terminal = (Tree::PrimitiveP) (new Tree::Primitives::TerminalT); break; + case Tree::Primitives::String: + terminal = (Tree::PrimitiveP) (new Tree::Primitives::TerminalT); break; + } + + // if the 'name' can be identified as a value of the 'currentType', then it's a _constant terminal_ (of that value) + // otherwise, it's a regular terminal with 'name' + std::istringstream ss(name); + switch (currentType) + { + case Tree::Primitives::Double: + double dblValue; + ss >> dblValue; + if (ss.fail() == false) + terminal->setValue(&dblValue); + break; + case Tree::Primitives::Int: + int intValue; + ss >> intValue; + if (ss.fail() == false) + terminal->setValue(&intValue); + break; + case Tree::Primitives::Bool: + bool boolValue; + ss >> boolValue; + if (name == "true") + boolValue = true; + else if (name == "false") + boolValue = false; + if (ss.fail() == false || name == "true" || name == "false") { + if (boolValue) + name = "true"; + else + name = "false"; + terminal->setValue(&boolValue); + } + break; + case Tree::Primitives::Char: + char charValue; + ss >> charValue; + if (ss.fail() == false) + terminal->setValue(&charValue); + break; + case Tree::Primitives::String: + std::string stringValue; + ss >> stringValue; + if (ss.fail() == false) + terminal->setValue(&stringValue); + break; + } + terminal->setName(name); + primitiveSet_->addTerminal(terminal); + + } + + if (primitiveSet_->getTerminalSetSize() == 0) { + ECF_LOG_ERROR(state_, "Tree: Empty terminal set!"); + throw(""); + } + + } + + // mandatory: write to XMLNode + void GEPChromosome::write(XMLNode &xGEPChromosome) + { + xGEPChromosome = XMLNode::createXMLTopNode("GEPChromosome"); + std::stringstream sValue; + sValue << genes; + xGEPChromosome.addAttribute("genes", sValue.str().c_str()); + sValue.str(""); + sValue << headLength; + xGEPChromosome.addAttribute("headLength",sValue.str().c_str()); + sValue.str(""); + sValue << tailLength; + xGEPChromosome.addAttribute("tailLength", sValue.str().c_str()); + sValue.str(""); + sValue << linkHeadLength; + xGEPChromosome.addAttribute("linkLength", sValue.str().c_str()); + for (uint g = 0; g < genes; g++){ + sValue.str(""); + XMLNode xGene = XMLNode::createXMLTopNode("Gene"); + for (uint i = 0; i < this->geneLength; i++) { + sValue << this->at(g*(this->geneLength)+i)->primitive_->getName() << " "; + } + xGene.addText(sValue.str().c_str()); + xGEPChromosome.addChild(xGene); + } + // print homeotic gene + sValue.str(""); + XMLNode xCell = XMLNode::createXMLTopNode("Cell"); + uint cellOffset = this->genes * this->geneLength; + for (uint i = 0; i < this->linkHeadLength + this->linkTailLength; i++) { + sValue << this->at(cellOffset + i)->primitive_->getName() << " "; + } + xCell.addText(sValue.str().c_str()); + xGEPChromosome.addChild(xCell); + } + + // read from XMLNode + // mandatory if running parallel ECF or reading population from a milestone file + void GEPChromosome::read(XMLNode& xGEPChromosome) + { + this->clear(); + //this->primitiveSet_ = static_cast (new Tree::PrimitiveSet); + //this->primitiveSet_->initialize(state_); + XMLCSTR genesStr = xGEPChromosome.getAttribute("genes"); + uint size = str2uint(genesStr); + + XMLCSTR hlenStr = xGEPChromosome.getAttribute("headLength"); + uint headlen = str2uint(hlenStr); + + XMLCSTR tlenStr = xGEPChromosome.getAttribute("linkLength"); + uint linklen = str2uint(tlenStr); + // loop over genes + for (uint i = 0; i <= size; i++){ + XMLNode xGene = xGEPChromosome.getChildNode(i); + XMLCSTR tree = xGene.getText(); + std::stringstream stream; + stream << tree; + + if (i < size){ + std::string primitiveStr; + uint position = 0; + + for (uint iNode = 0; iNode < this->geneLength; iNode++) { + stream >> primitiveStr; + Tree::Node* node = new Tree::Node(); + + // 'regular' primitives + Tree::PrimitiveP prim = primitiveSet_->getPrimitiveByName(primitiveStr); + if (prim != Tree::PrimitiveP()) { + node->setPrimitive(prim); + this->push_back(static_cast(node)); + continue; + } + // ERCs + // (TODO: include user defined ERC types) + Tree::PrimitiveP erc; + std::string prefix = primitiveStr.substr(0, 2); + std::string value = primitiveStr.substr(2); + std::stringstream ss; + ss << value; + if (prefix == DBL_PREFIX) { + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERCD); + double v; + ss >> v; + erc->setValue(&v); + } + else if (prefix == INT_PREFIX) { + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERC); + int v; + ss >> v; + erc->setValue(&v); + } + else if (prefix == BOOL_PREFIX) { + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERC); + bool v; + ss >> v; + erc->setValue(&v); + } + else if (prefix == CHR_PREFIX) { + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERC); + char v; + ss >> v; + erc->setValue(&v); + } + else if (prefix == STR_PREFIX) { + erc = (Tree::PrimitiveP)(new Tree::Primitives::ERC); + std::string v; + ss >> v; + erc->setValue(&v); + } + else { + ECF_LOG_ERROR(state_, "GEPChromosome genotype: undefined primitive (" + primitiveStr + ")!"); + throw(""); + } + erc->setName(primitiveStr); + node->primitive_ = erc; + this->push_back(static_cast(node)); + } + } + else{ // Deal with the "Cell" gene + std::string primitiveStr; + uint position = 0; + + for (uint iNode = 0; iNode < this->linkHeadLength+this->linkTailLength; iNode++) { + stream >> primitiveStr; + Tree::Node* node = new Tree::Node(); + // 'regular' primitives + Tree::PrimitiveP prim = linkFunctionSet_->getPrimitiveByName(primitiveStr); + if (prim != Tree::PrimitiveP()) { // if it is a linking function + node->setPrimitive(prim); + this->push_back(static_cast(node)); + continue; + } + else{ + ECF_LOG_ERROR(state_, "GEPChromosome genotype: undefined primitive (" + primitiveStr + ") for the Cell gene!"); + throw(""); + } + } + } + } + } + + Tree::Tree* GEPChromosome::toTree(uint gene){ + ECF_LOG(this->state_, 5, "Performing GEP -> Tree conversion..."); + + Tree::Tree* tree = new Tree::Tree(); + // Copy primitive set + tree->primitiveSet_ = this->primitiveSet_; + + uint geneOffset = gene*(this->geneLength); + uint ercIdx = geneOffset + this->headLength + this->tailLength; + uint ercCount = 0; + // Get root node arity + //geneOffset = 0; + uint i = geneOffset; + uint nArgs = this->at(i++)->primitive_->getNumberOfArguments(); + // Get tree level indices + std::vector idx; + int level = 0; + uint nextLevelStart = 1 + geneOffset; + idx.push_back(geneOffset); + while (nArgs > 0){ + uint lvlArity = 0; + idx.push_back(nextLevelStart); + for (uint j = 0; j < nArgs; j++){ + lvlArity += this->at(nextLevelStart++)->primitive_->getNumberOfArguments(); + } + nArgs = lvlArity; + } + //Read the gene and annotate the locations of the constants (needed later) + std::vector constants(this->size(), -99999); + if (this->dcLength > 0){ + for (uint c = geneOffset; c < geneOffset + this->headLength + this->tailLength; c++){ + if (this->at(c)->primitive_->getName() == "?"){ + constants[c] = ercCount++; + } + } + } + // Translate expression + // Helper array to store the per-level arguments needed + std::vector args(idx.size(), 0); + // Iterate while root node hasn't completed + std::vector visited(this->size(), false); + while (idx.at(0) == geneOffset){ + // Read and this node to GP expression, if it hasn't been visited yet + if (!visited.at(idx.at(level))){ + Tree::NodeP GEPnode = static_cast (new Tree::Node(this->at(idx.at(level)))); + // If it is an ERC placeholder, replace with the next ERC + if (GEPnode->primitive_->getName() == "?"){ + GEPnode = static_cast (new Tree::Node(this->at(ercIdx+constants.at(idx.at(level))))); + } + args[level] = GEPnode->primitive_->getNumberOfArguments(); + // Push node into Tree representation + Tree::NodeP node = static_cast (new Tree::Node(GEPnode)); + tree->addNode(node); + visited.at(idx.at(level)) = true; + } + // If operator still needs children, go down one level and read the necessary arguments + if (args.at(level) > 0){ + level++; + } + // If it is a terminal or a satisfied operator, go up one level and increase reading index + else{ + idx[level]++; + level--; + // Decrease needed arguments + if (level >= 0) args[level]--; + } + } + // Update the size and depth of each tree node + tree->update(); + // Print tree + XMLNode xInd; + tree->write(xInd); + char *s = xInd.createXMLString(); + ECF_LOG(this->state_, 5, "Tree conversion result: \n" + std::string(s)); + freeXMLString(s); + return tree; + } + + Tree::Tree* GEPChromosome::makeCellTree(){ + ECF_LOG(this->state_, 5, "Performing GEP -> Tree conversion at the cell level..."); + + Tree::Tree* tree = new Tree::Tree(); + // Copy primitive set + tree->primitiveSet_ = this->linkFunctionSet_; + + uint geneOffset = this->genes*(this->geneLength); + // Get root node arity + uint i = geneOffset; + uint nArgs = this->at(i++)->primitive_->getNumberOfArguments(); + // Get tree level indices + std::vector idx; + int level = 0; + uint nextLevelStart = 1 + geneOffset; + idx.push_back(geneOffset); + while (nArgs > 0){ + uint lvlArity = 0; + idx.push_back(nextLevelStart); + for (uint j = 0; j < nArgs; j++){ + lvlArity += this->at(nextLevelStart++)->primitive_->getNumberOfArguments(); + } + nArgs = lvlArity; + } + // Translate expression + // Helper array to store the per-level arguments needed + std::vector args(idx.size(), 0); + // Iterate while root node hasn't completed + std::vector visited(this->size(), false); + while (idx.at(0) == geneOffset){ + // Read and this node to GP expression, if it hasn't been visited yet + if (!visited.at(idx.at(level))){ + Tree::NodeP GEPnode = static_cast (new Tree::Node(this->at(idx.at(level)))); + args[level] = GEPnode->primitive_->getNumberOfArguments(); + // Push node into Tree representation + Tree::NodeP node = static_cast (new Tree::Node(GEPnode)); + tree->addNode(node); + visited.at(idx.at(level)) = true; + } + // If operator still needs children, go down one level and read the necessary arguments + if (args.at(level) > 0){ + level++; + } + // If it is a terminal or a satisfied operator, go up one level and increase reading index + else{ + idx[level]++; + level--; + // Decrease needed arguments + if (level >= 0) args[level]--; + } + } + // Update the size and depth of each tree node + tree->update(); + // Print tree + XMLNode xInd; + tree->write(xInd); + char *s = xInd.createXMLString(); + ECF_LOG(this->state_, 5, "Tree conversion result: \n" + std::string(s)); + freeXMLString(s); + return tree; + } + void GEPChromosome::assemble(){ + this->subtrees.clear(); + this->cellTree = this->makeCellTree(); + for (uint i = 0; i < this->genes; i++){ + Tree::Tree *subtree = this->toTree(i); + this->subtrees.push_back(subtree); + } + } + + void GEPChromosome::execute(void *result){ + // Obtain the cell tree structure + //Tree::Tree *tree = this->cellTree(); + // Translate and execute all the gene subtrees + // TODO: detect which genes are actually used so as to not evaluate unneeded ones + double tmp = 0; + for (uint i = 0; i < this->genes; i++){ + Tree::Tree *subtree = this->subtrees.at(i); + subtree->execute(&tmp); + // Set the terminal values according to the subtrees + this->cellTree->setTerminalValue(GEP_GENE_PREFIX + uint2str(i), &tmp); + } + // Finally, translate the cell tree and store the result + this->cellTree->execute(result); + } + + /** + * \brief Set a terminal's value. + * + * \param name terminal's name + * \param value terminal's value + */ + void GEPChromosome::setTerminalValue(std::string name, void* value) + { + Tree::PrimitiveP term = primitiveSet_->getTerminalByName(name); + if (term == Tree::PrimitiveP()) { + ECF_LOG_ERROR(state_, "GEPChromosome genotype: invalid terminal name referenced in setTerminalValue()!"); + throw(""); + } + + term->setValue(value); + } +} + diff --git a/gep/GEPChromosome.h b/gep/GEPChromosome.h new file mode 100644 index 0000000..643bc73 --- /dev/null +++ b/gep/GEPChromosome.h @@ -0,0 +1,70 @@ +#ifndef GEPCHROMOSOME_H +#define GEPCHROMOSOME_H + +#include +#include "GEPChromosomeCrsOnePoint.h" +#include "GEPChromosomeCrsTwoPoint.h" +#include "GEPChromosomeCrsGene.h" +#include "GEPChromosomeMutOp.h" +#include "GEPChromosomeMutGauss.h" +#include +#define GEP_GENE_PREFIX "GENE_" +namespace GEPChromosome{ + + /** + * \defgroup gep GEP + * \ingroup genotypes + */ + /** + * \ingroup genotypes gep + * \brief GEPChromosome class - implements genotype as a Gene Expression Programming chromosome. + * + * This representation consists of one or more genes, each consisting of two main parts and an optional third component. + * The first part is called the Head. It has a size specified by the user and may contain any primitive. + * The second part is called the Tail. It may contain only terminals and has a fixed size (depending on the maximum arity of the primitive set and the head length). These two properties, along with the expression procedure, guarantee that every GEP chromosome encodes a valid expression. + * The optional third part is called the "Dc domain". It contains the values of random constants and has the same size as the Tail. If used, a constant placeholder primitive "?" is added to the primitive set. + * GEPChromosome inherits a fixed-size vector of Node objects. Each node points to a Primitive object. + * Nodes in tree are aligned in a breadth-first notation. + * Being fixed-size and breadth-first-expressed, this means that non-coding neutral node regions can exist in any given individual. + * Some primitives are shared among all the trees, but some (such as ERC primitives) belong to a single GEPChromosome. + */ + class GEPChromosome : public std::vector, public Genotype + { + public: + StateP state_; + std::vector userFunctions_; // programatically added functions + std::vector userTerminals_; // programatically added terminals + uint headLength; ///< length of the head. User-specified + uint tailLength; ///< length of the tail. Automatically calculated. + uint dcLength; ///< length of the constant values domain + uint geneLength; ///< total length of each gene + uint linkHeadLength; ///< length of the linking function gene's head + uint linkTailLength; ///< length of the linking function gene's tail + uint genes; ///< number of genes + Tree::PrimitiveSetP primitiveSet_; // contains all functions, terminals and the ERC placeholder (if needed) + Tree::PrimitiveSetP linkFunctionSet_; // contains all linking functions and genic terminals (integers) + Tree::PrimitiveSetP ercSet_; // contains only the ERC ranges + bool usesERC; ///< whether or not the chromosome uses random constants + bool staticLink; ///< whether we are using a static linking function or if it should be allowed to evolve + Tree::Tree *cellTree; + std::vector subtrees; + + GEPChromosome(); + GEPChromosome* copy(); + std::vector getCrossoverOp(); + std::vector getMutationOp(); + void registerParameters(StateP state); + bool initialize(StateP state); + void initializeFirst(GEPChromosome* home); + void write(XMLNode &xGEPChromosome); + void read(XMLNode& xGEPChromosomeInd); + void execute(void*); + Tree::Tree* toTree(uint gene); + Tree::Tree* makeCellTree(); + void assemble(); + void generateChromosome(); + void setTerminalValue(std::string name, void* value); + }; +} +typedef boost::shared_ptr GEPChromosomeP; +#endif // GEPChromosome_h \ No newline at end of file diff --git a/gep/GEPChromosomeCrsGene.cpp b/gep/GEPChromosomeCrsGene.cpp new file mode 100644 index 0000000..e5bab9b --- /dev/null +++ b/gep/GEPChromosomeCrsGene.cpp @@ -0,0 +1,53 @@ +#include "GEPChromosomeCrsGene.h" +namespace GEPChromosome{ + void GEPChromosomeCrsGene::registerParameters(StateP state) + { + myGenotype_->registerParameter(state, "crx.gene", (voidP) new double(0), ECF::DOUBLE); + } + + + bool GEPChromosomeCrsGene::initialize(StateP state) + { + voidP sptr = myGenotype_->getParameterValue(state, "crx.gene"); + probability_ = *((double*)sptr.get()); + return true; + } + + bool GEPChromosomeCrsGene::mate(GenotypeP gen1, GenotypeP gen2, GenotypeP child){ + // get the genotype from the parents and child + GEPChromosome* p1 = (GEPChromosome*)(gen1.get()); + GEPChromosome* p2 = (GEPChromosome*)(gen2.get()); + GEPChromosome* ch = (GEPChromosome*)(child.get()); + // test whether gene crossover is viable (i.e., there is more than one gene) + if (p1->genes < 2){ + ECF_LOG(state_, 5, "Gene crossover failed: chromosome consists of fewer than 2 genes"); + return true; + } + ch->clear(); + // select the gene number to be swapped + uint geneCrs = state_->getRandomizer()->getRandomInteger(0, (int)p1->genes); + uint geneLen = p1->geneLength; + ECF_LOG(state_, 5, "Performing Gene crossover in gene ("+ uint2str(geneCrs) + ")..."); + // swap the selected gene between the parents + uint jStop; + switch (state_->getRandomizer()->getRandomInteger(0, 1)) { + case 0: + for (uint i = 0; i <= ch->genes; i++) { + jStop = (i == ch->genes) ? ch->linkHeadLength + ch->linkTailLength : geneLen; + for (uint j = 0; j < jStop; j++){ + ch->push_back(static_cast (new Tree::Node(i == geneCrs ? p2->at(i*geneLen + j) : p1->at(i*geneLen + j)))); + } + } + break; + case 1: + for (uint i = 0; i <= ch->genes; i++) { + jStop = (i == ch->genes) ? ch->linkHeadLength + ch->linkTailLength : geneLen; + for (uint j = 0; j < jStop; j++){ + ch->push_back(static_cast (new Tree::Node(i == geneCrs ? p1->at(i*geneLen + j) : p2->at(i*geneLen + j)))); + } + } + } + + return true; + } +} \ No newline at end of file diff --git a/gep/GEPChromosomeCrsGene.h b/gep/GEPChromosomeCrsGene.h new file mode 100644 index 0000000..e8cb1c5 --- /dev/null +++ b/gep/GEPChromosomeCrsGene.h @@ -0,0 +1,21 @@ +#ifndef GEPCHROMOSOMECRSGENE_H +#define GEPCHROMOSOMECRSGENE_H +#include "ecf/ECF_base.h" +#include "GEPChromosome.h" +namespace GEPChromosome +{ + /** + * \ingroup genotypes gep + * \brief GEPChromosome genotype: gene crx operator. + * Selects a gene number and swaps it between both parents + */ + + class GEPChromosomeCrsGene : public CrossoverOp + { + public: + bool initialize(StateP); + void registerParameters(StateP); + bool mate(GenotypeP gen1, GenotypeP gen2, GenotypeP child); + }; +} +#endif // GEPChromosomeCrsGene_h \ No newline at end of file diff --git a/gep/GEPChromosomeCrsOnePoint.cpp b/gep/GEPChromosomeCrsOnePoint.cpp new file mode 100644 index 0000000..239a388 --- /dev/null +++ b/gep/GEPChromosomeCrsOnePoint.cpp @@ -0,0 +1,44 @@ +#include "GEPChromosomeCrsOnePoint.h" +namespace GEPChromosome{ + void GEPChromosomeCrsOnePoint::registerParameters(StateP state) + { + myGenotype_->registerParameter(state, "crx.onepoint", (voidP) new double(0), ECF::DOUBLE); + } + + + bool GEPChromosomeCrsOnePoint::initialize(StateP state) + { + voidP sptr = myGenotype_->getParameterValue(state, "crx.onepoint"); + probability_ = *((double*)sptr.get()); + return true; + } + + bool GEPChromosomeCrsOnePoint::mate(GenotypeP gen1, GenotypeP gen2, GenotypeP child){ + // get the genotype from the parents and the child + GEPChromosome* p1 = (GEPChromosome*)(gen1.get()); + GEPChromosome* p2 = (GEPChromosome*)(gen2.get()); + GEPChromosome* ch = (GEPChromosome*)(child.get()); + ch->clear(); + // select a point on which the crossover operator will be applied + uint bitCrs = state_->getRandomizer()->getRandomInteger(0, (int)p1->size() - 1); + ECF_LOG(state_, 5, "Performing One-Pt crossover at node (" + uint2str(bitCrs) + ")..."); + // Copy the nodes from one parent until the crossover point, then the nodes from the other parent until the end + switch (state_->getRandomizer()->getRandomInteger(0, 1)) { + case 0: for (uint i = 0; i < bitCrs; i++) { + ch->push_back(static_cast (new Tree::Node((p1->at(i))))); + } + for (uint i = bitCrs; i < p2->size(); i++) { + ch->push_back(static_cast (new Tree::Node((p2->at(i))))); + } + break; + case 1: for (uint i = 0; i < bitCrs; i++) { + ch->push_back(static_cast (new Tree::Node((p2->at(i))))); + } + for (uint i = bitCrs; i < p1->size(); i++) { + ch->push_back(static_cast (new Tree::Node((p1->at(i))))); + } + } + + return true; + } +} \ No newline at end of file diff --git a/gep/GEPChromosomeCrsOnePoint.h b/gep/GEPChromosomeCrsOnePoint.h new file mode 100644 index 0000000..9e68610 --- /dev/null +++ b/gep/GEPChromosomeCrsOnePoint.h @@ -0,0 +1,21 @@ +#ifndef GEPCHROMOSOMECRSONEPOINT_H +#define GEPCHROMOSOMECRSONEPOINT_H +#include "ecf/ECF_base.h" +#include "GEPChromosome.h" +namespace GEPChromosome +{ + /** + * \ingroup genotypes gep + * \brief GEPChromosome genotype: one point crx operator. + * Selects a crossing point from which to exchange genetic information. + */ + + class GEPChromosomeCrsOnePoint : public CrossoverOp + { + public: + bool initialize(StateP); + void registerParameters(StateP); + bool mate(GenotypeP gen1, GenotypeP gen2, GenotypeP child); + }; +} +#endif // GEPChromosomeCrsOnePoint_h \ No newline at end of file diff --git a/gep/GEPChromosomeCrsTwoPoint.cpp b/gep/GEPChromosomeCrsTwoPoint.cpp new file mode 100644 index 0000000..7c54888 --- /dev/null +++ b/gep/GEPChromosomeCrsTwoPoint.cpp @@ -0,0 +1,64 @@ +#include "GEPChromosomeCrsTwoPoint.h" +namespace GEPChromosome{ + void GEPChromosomeCrsTwoPoint::registerParameters(StateP state) + { + myGenotype_->registerParameter(state, "crx.twopoint", (voidP) new double(0), ECF::DOUBLE); + } + + + bool GEPChromosomeCrsTwoPoint::initialize(StateP state) + { + voidP sptr = myGenotype_->getParameterValue(state, "crx.twopoint"); + probability_ = *((double*)sptr.get()); + return true; + } + + bool GEPChromosomeCrsTwoPoint::mate(GenotypeP gen1, GenotypeP gen2, GenotypeP child){ + // get the genotypes from the parents and child + GEPChromosome* p1 = (GEPChromosome*)(gen1.get()); + GEPChromosome* p2 = (GEPChromosome*)(gen2.get()); + GEPChromosome* ch = (GEPChromosome*)(child.get()); + ch->clear(); + // select two points between which to perform the crossover + uint bitCrs = state_->getRandomizer()->getRandomInteger(0, (int)p1->size() - 1); + uint bitCrsSecond, tmp; + do + { + bitCrsSecond = state_->getRandomizer()->getRandomInteger(p1->size()); + } while (bitCrs == bitCrsSecond); + + if (bitCrs>bitCrsSecond) + { + tmp = bitCrs; + bitCrs = bitCrsSecond; + bitCrsSecond = tmp; + } + ECF_LOG(state_, 5, "Performing Two-Pt crossover at nodes (" + uint2str(bitCrs) + ") and (" + uint2str(bitCrsSecond) + ")..."); + // swap the region delimited by the chosen points between the parents + switch (state_->getRandomizer()->getRandomInteger(0, 1)) { + case 0: + for (uint i = 0; i < bitCrs; i++) { + ch->push_back(static_cast (new Tree::Node((p1->at(i))))); + } + for (uint i = bitCrs; i < bitCrsSecond; i++) { + ch->push_back(static_cast (new Tree::Node((p2->at(i))))); + } + for (uint i = bitCrsSecond; i < p2->size(); i++) { + ch->push_back(static_cast (new Tree::Node((p1->at(i))))); + } + break; + case 1: + for (uint i = 0; i < bitCrs; i++) { + ch->push_back(static_cast (new Tree::Node((p2->at(i))))); + } + for (uint i = bitCrs; i < bitCrsSecond; i++) { + ch->push_back(static_cast (new Tree::Node((p1->at(i))))); + } + for (uint i = bitCrsSecond; i < p1->size(); i++) { + ch->push_back(static_cast (new Tree::Node((p2->at(i))))); + } + } + + return true; + } +} \ No newline at end of file diff --git a/gep/GEPChromosomeCrsTwoPoint.h b/gep/GEPChromosomeCrsTwoPoint.h new file mode 100644 index 0000000..c1e3242 --- /dev/null +++ b/gep/GEPChromosomeCrsTwoPoint.h @@ -0,0 +1,21 @@ +#ifndef GEPCHROMOSOMECRSTWOPOINT_H +#define GEPCHROMOSOMECRSTWOPOINT_H +#include "ecf/ECF_base.h" +#include "GEPChromosome.h" +namespace GEPChromosome +{ + /** + * \ingroup genotypes gep + * \brief GEPChromosome genotype: two point crx operator. + * Selects two crossing points between which to exchange genetic information. + */ + + class GEPChromosomeCrsTwoPoint : public CrossoverOp + { + public: + bool initialize(StateP); + void registerParameters(StateP); + bool mate(GenotypeP gen1, GenotypeP gen2, GenotypeP child); + }; +} +#endif // GEPChromosomeCrsTwoPoint_h \ No newline at end of file diff --git a/gep/GEPChromosomeMutGauss.cpp b/gep/GEPChromosomeMutGauss.cpp new file mode 100644 index 0000000..51eb400 --- /dev/null +++ b/gep/GEPChromosomeMutGauss.cpp @@ -0,0 +1,54 @@ +#include "GEPChromosomeMutGauss.h" + +namespace GEPChromosome{ + void GEPChromosomeMutGauss::registerParameters(StateP state) + { + myGenotype_->registerParameter(state, "mut.gauss", (voidP) new double(0), ECF::DOUBLE); + } + + bool GEPChromosomeMutGauss::initialize(StateP state) + { + voidP sptr = myGenotype_->getParameterValue(state, "mut.gauss"); + probability_ = *((double*)sptr.get()); + engine_.seed((uint)time(NULL)); + return true; + } + + bool GEPChromosomeMutGauss::mutate(GenotypeP gene){ + GEPChromosome* chr = (GEPChromosome*)(gene.get()); + if (chr->dcLength > 0){ + // mutate a single random point in the Dc domain by adding Gaussian noise to it + // Select a random gene + uint iGene = state_->getRandomizer()->getRandomInteger(0, chr->genes - 1); + uint geneOffset = iGene * chr->geneLength; + uint constOffset = geneOffset + chr->headLength + chr->tailLength; // this is where the Dc of the selected gene begins + // Select a random point in the Dc tail + uint iPoint = constOffset + state_->getRandomizer()->getRandomInteger(0, (uint)(chr->dcLength) - 1); + + double oldValue; + Tree::PrimitiveP oldPrim = chr->at(iPoint)->primitive_; + chr->at(iPoint)->primitive_->getValue(&oldValue); + std::string oldName = chr->at(iPoint)->primitive_->getName(); + + // generate Gauss noise offset and add it + boost::normal_distribution N(0, 1); + double offset = N.operator () < boost::lagged_fibonacci607 > (engine_); + double newValue = oldValue + offset; + + // change double ERC value and name + std::stringstream ss; + ss << newValue; + std::string newName; + ss >> newName; + newName = DBL_PREFIX + newName; + + oldPrim->setName(newName); + oldPrim->setValue(&newValue); + // print result + std::stringstream log; + log << "GEPMutGauss successful (oldNode = " << oldName << ", newNode = " << newName << ")"; + ECF_LOG(state_, 5, log.str()); + } + return true; + } +} \ No newline at end of file diff --git a/gep/GEPChromosomeMutGauss.h b/gep/GEPChromosomeMutGauss.h new file mode 100644 index 0000000..f19e5ba --- /dev/null +++ b/gep/GEPChromosomeMutGauss.h @@ -0,0 +1,24 @@ +#ifndef GEPCHROMOSOMEMUTGAUSS_H +#define GEPCHROMOSOMEMUTGAUSS_H +#include "ecf/ECF_base.h" +#include +#include "GEPChromosome.h" +#include "boost/random/normal_distribution.hpp" +#include "boost/random/lagged_fibonacci.hpp" +namespace GEPChromosome{ + /** + * \ingroup genotypes gep + * \brief GEPChromosome genotype: standard normal distribution noise mutation operator. + * Applicable only on ephemereal random constants (ERC) of type 'double'. + */ + class GEPChromosomeMutGauss : public MutationOp + { + public: + void registerParameters(StateP state); + bool initialize(StateP state); + bool mutate(GenotypeP gene); + protected: + boost::lagged_fibonacci607 engine_; + }; +} +#endif // GEPChromosomeMutGauss_h \ No newline at end of file diff --git a/gep/GEPChromosomeMutOp.cpp b/gep/GEPChromosomeMutOp.cpp new file mode 100644 index 0000000..f2f2bbf --- /dev/null +++ b/gep/GEPChromosomeMutOp.cpp @@ -0,0 +1,32 @@ +#include "GEPChromosomeMutOp.h" +namespace GEPChromosome{ + void GEPChromosomeMutOp::registerParameters(StateP state) + { + myGenotype_->registerParameter(state, "mut.simple", (voidP) new double(0), ECF::DOUBLE); + } + + + bool GEPChromosomeMutOp::initialize(StateP state) + { + voidP sptr = myGenotype_->getParameterValue(state, "mut.simple"); + probability_ = *((double*)sptr.get()); + + return true; + } + bool GEPChromosomeMutOp::mutate(GenotypeP gene){ + GEPChromosome* chr = (GEPChromosome*)(gene.get()); + // mutate a single random point in the genotype by selecting a random primitive + // Select a random gene + uint iGene = state_->getRandomizer()->getRandomInteger(0, chr->genes - 1); + uint geneOffset = iGene * chr->geneLength; + // Select a random point in the head+tail + uint iPoint = state_->getRandomizer()->getRandomInteger(0, (uint)(chr->headLength+chr->tailLength) - 1); + std::stringstream logstr; + logstr << "Mutating node (" << iPoint << ") in gene [" << iGene << "]..."; + ECF_LOG(state_, 5, logstr.str()); + // get primitive depending on where the selected point is + // head points can change into anything; tail points must be terminals + chr->at(geneOffset + iPoint)->setPrimitive(iPoint < chr->headLength ? chr->primitiveSet_->getRandomPrimitive() : chr->primitiveSet_->getRandomTerminal()); + return true; + } +} \ No newline at end of file diff --git a/gep/GEPChromosomeMutOp.h b/gep/GEPChromosomeMutOp.h new file mode 100644 index 0000000..f71a0cf --- /dev/null +++ b/gep/GEPChromosomeMutOp.h @@ -0,0 +1,19 @@ +#ifndef GEPCHROMOSOMEMUTOP_H +#define GEPCHROMOSOMEMUTOP_H +#include "ecf/ECF_base.h" +#include "GEPChromosome.h" +namespace GEPChromosome{ + /** + * \ingroup genotypes gep + * \brief GEPChromosome genotype: node replacement mutation operator. + * Tries to replace the selected primitive with a different one, conforming to the restrictions of the domain where it is located. + */ + class GEPChromosomeMutOp : public MutationOp + { + public: + void registerParameters(StateP state); + bool initialize(StateP state); + bool mutate(GenotypeP gene); + }; +} +#endif // GEPChromosomeMutOp_h \ No newline at end of file diff --git a/implicit_functions/SymbolicRegressionUtil.cpp b/implicit_functions/SymbolicRegressionUtil.cpp index 6f83b95..cace3db 100644 --- a/implicit_functions/SymbolicRegressionUtil.cpp +++ b/implicit_functions/SymbolicRegressionUtil.cpp @@ -2,6 +2,7 @@ // Created by josipmrden on 29. 03. 2020.. // +#include #include "SymbolicRegressionUtil.h" //"- 25 + * X X + * Y Y * Z Z"; - how to write a solution diff --git a/implicit_functions/SymbolicRegressionUtil.h b/implicit_functions/SymbolicRegressionUtil.h index e9e1af2..86c7938 100644 --- a/implicit_functions/SymbolicRegressionUtil.h +++ b/implicit_functions/SymbolicRegressionUtil.h @@ -23,7 +23,9 @@ bool containsAllVariables(Tree::Tree* tree, vector variables); bool containsAllVariables(vector trees, vector variables); vector generateRandomDataset(StateP state, int sampleSize, int varSize); + bool isLowStdevOnRandomValues(Tree::Tree* tree, StateP state, vector variables); + vector getStdevsFromPoints(Tree::Tree* firstTree, Tree::Tree* secondTree, vector points, vector> variables); double executeTree(Tree::Tree* tree, vector variables, Point point); diff --git a/implicit_functions/cross_validation/InstanceRunner.cpp b/implicit_functions/cross_validation/InstanceRunner.cpp index 92bb081..22b02ba 100644 --- a/implicit_functions/cross_validation/InstanceRunner.cpp +++ b/implicit_functions/cross_validation/InstanceRunner.cpp @@ -53,24 +53,27 @@ void InstanceRunner::runInstance(ParameterSet parameterSet) string datasetParameterLogDir = datasetLogDir + "/" + parametersName; createDirectory(datasetParameterLogDir); - string paretoFrontFilename = datasetParameterLogDir + "/paretoFront.txt"; - auto* paretoFrontier = new ParetoFrontier(paretoFrontFilename); - - for (int j = 0; j < _noRuns; j++) - { + for (int j = 0; j < _noRuns; j++) { + string paretoFrontFilename = datasetParameterLogDir + "/paretoFront" + to_string(j+1) + ".txt"; + auto* paretoFrontier = new ParetoFrontier(paretoFrontFilename); _evaluateOp = _evaluateOp->createNew(); _evaluateOp->_datasetFileName = dataset.fileName; _evaluateOp->_paretoFrontier = paretoFrontier; - string runLogDir = datasetParameterLogDir + "/" + to_string(j+1); + string runLogDir = datasetParameterLogDir + "/" + to_string(j + 1); string batchStatsString = runLogDir + "/stats.txt"; string logFilenameString = runLogDir + "/log.txt"; string milestoneFilenameString = runLogDir + "/m.txt"; - + cout << runLogDir << endl; createDirectory(runLogDir); - StateP state (new State); - TreeP tree (new Tree::Tree); + StateP state(new State); + //state->addAlgorithm((GEPP)new GEP()); + //GEPChromosome::GEPChromosome *chromosome = new GEPChromosome::GEPChromosome(); + //state->addGenotype((GEPChromosomeP) chromosome); + TreeP tree(new Tree::Tree); + state->addGenotype(tree); + Tree::PrimitiveP exp(new Exp); tree->addFunction(exp); Tree::PrimitiveP sqrt(new Sqrt); @@ -79,14 +82,22 @@ void InstanceRunner::runInstance(ParameterSet parameterSet) tree->addFunction(square); Tree::PrimitiveP tanh(new Tanh); tree->addFunction(tanh); - state->addGenotype(tree); + Tree::PrimitiveP ngt(new Negated); + tree->addFunction(ngt); + /* + chromosome->userFunctions_.push_back(exp); + chromosome->userFunctions_.push_back(sqrt); + chromosome->userFunctions_.push_back(square); + chromosome->userFunctions_.push_back(tanh); + chromosome->userFunctions_.push_back(ngt); + */ _state = state; _state->setEvalOp(_evaluateOp); _state->initialize(argc, argv); - for (int treeIndex = 0; treeIndex < _evaluateOp->noTrees; treeIndex++) - { + + for (int treeIndex = 0; treeIndex < _evaluateOp->noTrees; treeIndex++) { _state->getGenotypes()[treeIndex]->setParameterValue( _state, "terminalset", @@ -101,12 +112,12 @@ void InstanceRunner::runInstance(ParameterSet parameterSet) _state->getRegistry()->modifyEntry("mutation.indprob", (voidP) ¶meterSet.mutationProbablity); _state->getRegistry()->modifyEntry("batch.statsfile", (voidP) new string(batchStatsString)); - _state->getRegistry()->modifyEntry("log.filename", (voidP) new string (logFilenameString)); - _state->getRegistry()->modifyEntry("milestone.filename", (voidP) new string (milestoneFilenameString)); + _state->getRegistry()->modifyEntry("log.filename", (voidP) new string(logFilenameString)); + _state->getRegistry()->modifyEntry("milestone.filename", (voidP) new string(milestoneFilenameString)); _state->getPopulation()->initialize(_state); + _state->run(); + paretoFrontier->writeParetoFront(); } - - paretoFrontier->writeParetoFront(); } } \ No newline at end of file diff --git a/implicit_functions/cross_validation/InstanceRunner.h b/implicit_functions/cross_validation/InstanceRunner.h index c58b30a..9ad51a4 100644 --- a/implicit_functions/cross_validation/InstanceRunner.h +++ b/implicit_functions/cross_validation/InstanceRunner.h @@ -14,6 +14,9 @@ #include #include #include +#include +#include +#include #include #include diff --git a/implicit_functions/cross_validation/ParameterSet.cpp b/implicit_functions/cross_validation/ParameterSet.cpp index 9ece7db..0344c3e 100644 --- a/implicit_functions/cross_validation/ParameterSet.cpp +++ b/implicit_functions/cross_validation/ParameterSet.cpp @@ -4,11 +4,12 @@ #include "ParameterSet.h" -ParameterSet::ParameterSet(int popSize, double mutProb, SelectionType selType) +ParameterSet::ParameterSet(int popSize, double mutProb, SelectionType selType, vector depths) { this->populationSize = popSize; this->mutationProbablity = mutProb; this->selectionType = selType; + this->depths = depths; } string ParameterSet::getName() diff --git a/implicit_functions/cross_validation/ParameterSet.h b/implicit_functions/cross_validation/ParameterSet.h index 570d55f..3e1dae7 100644 --- a/implicit_functions/cross_validation/ParameterSet.h +++ b/implicit_functions/cross_validation/ParameterSet.h @@ -7,6 +7,7 @@ #include +#include #include "CrossValUtil.h" using namespace std; @@ -15,8 +16,9 @@ class ParameterSet { int populationSize; double mutationProbablity; SelectionType selectionType; + vector depths; string getName(); - ParameterSet(int popSize, double mutProb, SelectionType selType); + ParameterSet(int popSize, double mutProb, SelectionType selType, vector depths); }; diff --git a/implicit_functions/cross_validation/TweakableParameters.cpp b/implicit_functions/cross_validation/TweakableParameters.cpp index bb48939..8f19646 100644 --- a/implicit_functions/cross_validation/TweakableParameters.cpp +++ b/implicit_functions/cross_validation/TweakableParameters.cpp @@ -1,5 +1,5 @@ // -// Created by josipmrden on 23. 04. 2020.. +// Created by josipmrden on 23. 04. 2020. // #include "TweakableParameters.h" @@ -13,10 +13,11 @@ TweakableParameters::TweakableParameters() currentMutIndex = 0; currentPopIndex = 0; populationSizes = {50, 100, 200, 500, 1000}; - mutationProbablities = {0.1, 0.2, 0.3, 0.5, 0.7, 0.9}; + mutationProbablities = {0.9}; + depths = {4, 5, 6, 7, 8, 9, 10}; selectionTypes = {Tournament3, RouletteWheel}; } -TweakableParameters::TweakableParameters(vector popSizes, vector mutProbs, vector selectTypes) +TweakableParameters::TweakableParameters(vector popSizes, vector mutProbs, vector selectTypes, vector depths) { fixedSelType = false; fixedPopSize = false; @@ -27,6 +28,7 @@ TweakableParameters::TweakableParameters(vector popSizes, vector mu this->populationSizes = popSizes; this->mutationProbablities = mutProbs; this->selectionTypes = selectTypes; + this->depths = depths; } bool TweakableParameters::hasNext(TweakableType type) @@ -94,7 +96,7 @@ ParameterSet TweakableParameters::getNext(TweakableType type) break; } - ParameterSet ps(popParam, mutProb, st); + ParameterSet ps(popParam, mutProb, st, this->depths); return ps; } diff --git a/implicit_functions/cross_validation/TweakableParameters.h b/implicit_functions/cross_validation/TweakableParameters.h index 9b242a1..7273f86 100644 --- a/implicit_functions/cross_validation/TweakableParameters.h +++ b/implicit_functions/cross_validation/TweakableParameters.h @@ -20,11 +20,12 @@ class TweakableParameters { int currentSelIndex; public: vector populationSizes; + vector depths; vector mutationProbablities; vector selectionTypes; TweakableParameters(); - TweakableParameters(vector popSizes, vector mutProbs, vector selectTypes); + TweakableParameters(vector popSizes, vector mutProbs, vector selectTypes, vector depths); void fixPopSize(int popSize); void fixMutProb(int mutProb); void fixSelType(SelectionType selType); diff --git a/implicit_functions/unordered/GepEvaluation.cpp b/implicit_functions/unordered/GepEvaluation.cpp new file mode 100644 index 0000000..6a85536 --- /dev/null +++ b/implicit_functions/unordered/GepEvaluation.cpp @@ -0,0 +1,205 @@ +#include "GepEvaluation.h" + +using namespace std; + +GepEvaluation::GepEvaluation() : AbstractEvaluateOp() +{ + this->noTrees = 1; + this->requiresPlanes = true; +}; +GepEvaluation::GepEvaluation(string datasetFileName, ParetoFrontier* paretoFrontier, StateP state) + : AbstractEvaluateOp(datasetFileName, paretoFrontier) +{ + this->noTrees = 1; + _state = state; + _initializedVariables = false; + this->requiresPlanes = true; +} + +AbstractEvaluateOp *GepEvaluation::createNew() { + return new GepEvaluation(); +} + +// called only once, before the evolution and generates training data +bool GepEvaluation::initialize(StateP state) +{ + _state = state; + _initializedVariables = false; + + std::ifstream inputFileStream(_datasetFileName); + int sampleSize; + int varCount; + + inputFileStream>>sampleSize; + inputFileStream>>varCount; + inputFileStream.ignore(1, '\n'); + + vector ellipses; + vector points; + for(int i = 0; i < sampleSize; i += 1) { + string pointLine; + getline(inputFileStream, pointLine); + istringstream lineStream(pointLine); + + vector sampleVariables; + for (int j = 0; j < varCount; j++) + { + string variableString; + getline(lineStream, variableString, ' '); + double variableValue = stod(variableString); + + sampleVariables.push_back(variableValue); + } + + string coneCoeffsLine; + getline(inputFileStream, coneCoeffsLine); + istringstream coneCoeffsStream(coneCoeffsLine); + + vector coeffs; + int coeffsSize = MultiDimEllipse::getNumberOfCoefficients(varCount); + string coeffStr; + + for (int j = 0; j < coeffsSize; j++) + { + getline(coneCoeffsStream, coeffStr, ' '); + double value = stod(coeffStr); + coeffs.push_back(value); + } + + Point p; + p.coordinates = sampleVariables; + MultiDimEllipse c(p, coeffs); + c.initializeDerivations(); + + _multiEllipses.push_back(c); + _points.push_back(p); + } +} + +FitnessP GepEvaluation::evaluate(IndividualP individual) +{ + //"- * 4 4 + * Y Y * X X" -> circle + //"- * 5 5 + + * Z Z * Y Y * X X" -> sphere + //"- - + X * * X X X * Y Y 1.5" -> hyperbola + //"+ / * - X 1 - X 1 * 3 3 - / * - Y 2 - Y 2 * 4 4 1" -> ellipse + //"+ - Z * 0.1 Y * 3 X" -> harmonic oscillator + //"- + / * - X 3 - X 3 * 2.5 2.5 / * - Y 4.5 - Y 4.5 * 5 5 1" -> nonlinear harmonic oscillator + + GEPChromosome::GEPChromosome* genotype = (boost::static_pointer_cast (individual->getGenotype())).get(); + genotype->assemble(); + Tree::Tree* tree = getTree(individual, ""); + + if (!_initializedVariables) + { + _variables = getAlgorithmVariables(tree, _state); + _initializedVariables = true; + } + + FitnessP fitness (new FitnessMin); + + double sampleSize = _points.size(); + int dimensionSize = _points[0].coordinates.size(); + double punishment = sampleSize * sampleSize * sampleSize; + + if (!containsAllVariables(tree, _variables) || isLowStdevOnRandomValuesGep(genotype, _state, _variables)) + { + fitness->setValue(punishment); + return fitness; + } + + double totalFitness = 0.0; + + for(int i = 0; i < sampleSize; i++) { + Point point = _points[i]; + + //calculate non moved value + double notMovedEvaluation = executeTreeGep(genotype, _variables, point); + + //derivation for each variable + vector derivationByVariables; + for (int j = 0; j < _variables.size(); j++) { + double movedEvaluation = executeTreeForMovedPointFastGep(genotype, _variables, point, j, epsylon); + double derivativeByOneVariable = (movedEvaluation - notMovedEvaluation) / epsylon; + derivationByVariables.push_back(derivativeByOneVariable); + } + + for (int j = 0; j < dimensionSize - 1; j++) + { + for (int k = j+1; k < dimensionSize; k++) + { + double dataDerivative = _multiEllipses[i].getDerivationCached(j, k); + double dv1 = derivationByVariables[j]; + double dv2 = derivationByVariables[k]; + + double calculatedPointFitness = getFitnessFromDerivation(dataDerivative, dv1, dv2, punishment); + totalFitness += calculatedPointFitness; + + if (totalFitness > punishment) + { + fitness->setValue(totalFitness); + return fitness; + } + } + } + } + + totalFitness /= _points.size(); + fitness->setValue(totalFitness); + + _paretoFrontier->updateParetoFront(tree, totalFitness); + + return fitness; +} + +bool GepEvaluation::isLowStdevOnRandomValuesGep(GEPChromosome::GEPChromosome* individual, StateP state, vector variables) +{ + int variablesSize = variables.size(); + vector results; + + for (int i = 0; i < 10; i++) + { + double result; + for (int j = 0; j < variablesSize; j++) + { + double randomValue = state->getRandomizer()->getRandomDouble(); + individual->setTerminalValue(variables[j], &randomValue); + } + + individual->execute(&result); + results.push_back(result); + } + + double stdev = getStdev(results); + + return stdev < 10E-6; +} + +double GepEvaluation::executeTreeGep(GEPChromosome::GEPChromosome* individual, vector variables, Point point) +{ + for (uint j = 0; j < variables.size(); j++) { + double value = point.coordinates[j]; + individual->setTerminalValue(variables[j], &value); + } + + double evaluation; + individual->execute(&evaluation); + + return evaluation; +} + +double GepEvaluation::executeTreeForMovedPointFastGep(GEPChromosome::GEPChromosome* individual, vector variables, Point point, int index, double epsylon) +{ + if (index != 0) + { + double value = point.coordinates[index - 1]; + individual->setTerminalValue(variables[index - 1], &value); + } + + double epsylonedValue = point.coordinates[index] + epsylon; + individual->setTerminalValue(variables[index], &epsylonedValue); + + double evaluation; + individual->execute(&evaluation); + + return evaluation; +} \ No newline at end of file diff --git a/implicit_functions/unordered/GepEvaluation.h b/implicit_functions/unordered/GepEvaluation.h new file mode 100644 index 0000000..890c2b4 --- /dev/null +++ b/implicit_functions/unordered/GepEvaluation.h @@ -0,0 +1,41 @@ +#ifndef SYMBREGEXERCISE_GEPEVAL_H +#define SYMBREGEXERCISE_GEPEVAL_H + +#include +#include +#include +#include "../ParetoFrontier.h" +#include +#include +#include +#include + +using namespace std; + +class GepEvaluation : public AbstractEvaluateOp +{ +private: + StateP _state; + vector _multiEllipses; + vector _points; + vector _variables; + bool _initializedVariables; + double epsylon = 10E-5; +public: + AbstractEvaluateOp* createNew() override; + bool initialize(StateP state) override; + FitnessP evaluate(IndividualP individual) override; + GepEvaluation(); + GepEvaluation(string datasetFileName, ParetoFrontier* paretoFrontier, StateP state); + + bool isLowStdevOnRandomValuesGep(GEPChromosome::GEPChromosome* individual, StateP state, vector variables); + double executeTreeGep(GEPChromosome::GEPChromosome* individual, vector variables, Point point); + double executeTreeForMovedPointFastGep(GEPChromosome::GEPChromosome* individual, vector variables, Point point, int index, double epsylon); + + string getName() override + { + return "GEP"; + } +}; + +#endif diff --git a/implicit_functions/unordered/UnorderedMultiDimImplicitEvaluation.cpp b/implicit_functions/unordered/UnorderedMultiDimImplicitEvaluation.cpp index d6487cd..ef7d49c 100644 --- a/implicit_functions/unordered/UnorderedMultiDimImplicitEvaluation.cpp +++ b/implicit_functions/unordered/UnorderedMultiDimImplicitEvaluation.cpp @@ -83,7 +83,7 @@ FitnessP UnorderedMultiDimImplicitEvaluation::evaluate(IndividualP individual) //"- - + X * * X X X * Y Y 1.5" -> hyperbola //"+ / * - X 1 - X 1 * 3 3 - / * - Y 2 - Y 2 * 4 4 1" -> ellipse //"+ - Z * 0.1 Y * 3 X" -> harmonic oscillator - //"+ - Z * 0.1 Y * 9.8 sin X" -> nonlinear harmonic oscillator + //"- + / * - X 3 - X 3 * 2.5 2.5 / * - Y 4.5 - Y 4.5 * 5 5 1" -> nonlinear harmonic oscillator Tree::Tree* tree = getTree(individual, ""); if (!_initializedVariables)