ECF 1.5
GPSymbRegEvalOp.cpp
1#include "GPSymbRegEvalOp.h"
2#include "ReadData.h"
3#include <vector>
4#include <boost/math/special_functions/fpclassify.hpp> // isnan, isinf
5
6#include "MeanSquareErrorMetric.h"
7#include "MeanAbsoluteErrorMetric.h"
8#include "MeanAbsolutePercentageErrorMetric.h"
9
10#include "IATree.h"
11#include "IATerminal.h"
12
13
15{
16 state->getRegistry()->registerEntry("input_file", (voidP)(new std::string), ECF::STRING);
17 state->getRegistry()->registerEntry("linear_scaling", (voidP) (new std::string), ECF::STRING);
18 state->getRegistry()->registerEntry("error_weights.file", (voidP) (new std::string), ECF::STRING);
19 state->getRegistry()->registerEntry("error_metric", (voidP) (new std::string), ECF::STRING);
20}
21
22
23// called only once, before the evolution generates training data
25{
26 std::stringstream ss;
27 voidP sptr = state->getRegistry()->getEntry("linear_scaling");
28 ss << *((std::string*) sptr.get());
29 std::string ls = ss.str();
30 if (ls.compare("true") == 0) {
31 linearScaling = true;
32 } else {
33 linearScaling = false;
34 }
35 ss.clear();
36
37
38 x.clear();
39 y.clear();
40 f.clear();
41
42 // initialize the Evaluator
43 eval.initialize();
44
45 // check if the parameters are stated (used) in the conf. file
46 if (state->getRegistry()->isModified("input_file")) {
47 std::string dataFile = *((std::string*) state->getRegistry()->getEntry("input_file").get());
48
49 // read from file into Evaluator data
50 if (!readDataFromFile(eval.data, dataFile)) {
51 return false;
52 }
53
54 nSamples = eval.data.size();
55 nVariables = eval.data[0].size() - 1;
56
57 varNames.clear();
58
59 for(uint i = 0; i < nVariables; i++) {
60 varNames.push_back ("x" + uint2str(i + 1));
61 }
62
63 GenotypeP genotype = state->getGenotypes()[0];
64 Tree::IATreeP iaTree = boost::dynamic_pointer_cast<Tree::IATree>(genotype);
65
66 if (iaTree != 0) {
67 for(uint i = 0; i < nVariables; i++) {
68 IATerminalP terminal = boost::dynamic_pointer_cast<IATerminal>(iaTree->primitiveSet_->getTerminalByName("x" + uint2str(i + 1)));
69 assert(terminal != 0);
70 double low = eval.data[0][i];
71 double up = low;
72 for (uint j = 1; j < nSamples; j++) {
73 double d = eval.data[j][i];
74 if (d > up) {
75 up = d;
76 }
77 if (d < low) {
78 low = d;
79 }
80 }
81
82 terminal->lowerBound = low;
83 terminal->upperBound = up;
84 }
85 }
86 }
87
88 errorWeightsDefined = false;
89
90 if (state->getRegistry()->isModified("error_weights.file")) {
91 std::string dataFile = *((std::string*) state->getRegistry()->getEntry("error_weights.file").get());
92
93 std::vector< std::vector<double> > error_weights;
94
95 // read from file into Evaluator data
96 if (!readDataFromFile(error_weights, dataFile)) {
97 return false;
98 }
99
100 errorWeightsDefined = true;
101
102 unsigned int size = error_weights.size();
103 for (uint i = 0; i < size; i++) {
104 errorWeights.push_back(error_weights[i][0]);
105 }
106
107 if (size < nSamples) {
108 std::cout << "Error weights count less than samples count." << std::endl;
109 errorWeightsDefined = false;
110 return false;
111 }
112 }
113
114 if (state->getRegistry()->isModified("error_metric")) {
115 std::string errorMetricStr = *((std::string*) state->getRegistry()->getEntry("error_metric").get());
116
117 if (errorMetricStr.compare("mean_square_error") == 0) {
118 errorMetric = new MeanSquareErrorMetric;
119 } else if (errorMetricStr.compare("mean_absolute_error") == 0) {
120 errorMetric = new MeanAbsoluteErrorMetric;
121 } else if (errorMetricStr.compare("mean_absolute_percentage_error") == 0) {
122 errorMetric = new MeanAbsolutePercentageErrorMetric;
123 } else {
124 std::cout << "Error metric unknown." << std::endl;
125 return false;
126 }
127 } else {
128 // default error metric
129 errorMetric = new MeanSquareErrorMetric;
130 }
131
132 return true;
133}
134
135
136bool GPSymbRegEvalOp::csvRead(StateP state, std::string entry, std::vector<double>* vec)
137{
138 std::ifstream stream;
139 std::string line;
140 voidP sptr = state->getRegistry()->getEntry(entry);
141 std::string fname = *((std::string*) sptr.get());
142 stream.open(fname.c_str());
143 if (!stream.is_open()) return false;
144 while (getline(stream, line)){
145 vec->push_back(atof(line.c_str()));
146 }
147 return true;
148}
149
150
151FitnessP GPSymbRegEvalOp::evaluateUsingLinearScaling(IndividualP individual)
152{
153 // we try to minimize the function value, so we use FitnessMin fitness (for minimization problems)
154 FitnessP fitness (new FitnessMin);
155
156 // get the genotype we defined in the configuration file
157 Tree::Tree* tree = (Tree::Tree*) individual->getGenotype().get();
158 // (you can also use boost smart pointers:)
159 //TreeP tree = boost::static_pointer_cast<Tree::Tree> (individual->getGenotype());
160
161 //average value of y and t
162 double mean_y = 0, mean_t = 0, sum_yt = 0, sum_sqr_y = 0;
163
164 for (uint i = 0 ; i < nSamples ; i++)
165 {
166 // set only defined variables (x1, x2, ...)
167 for(uint term = 0; term < nVariables; term++) {
168 tree->setTerminalValue(varNames[term], &eval.data[i][term]);
169 }
170 // get the y value of the current tree
171 double y;
172 tree->execute(&y);
173
174 mean_y += y;
175 double t = eval.data[i][nVariables];
176 mean_t += t;
177 sum_yt += t*y;
178 sum_sqr_y += y*y;
179 }
180
181 mean_y /= nSamples;
182 mean_t /= nSamples;
183
184 //coefficients for linear regression
185 double b = (sum_yt - nSamples*mean_y*mean_t)/(sum_sqr_y - nSamples*mean_y*mean_y);
186 double a = mean_t - b*mean_y;
187
188 bool isNaN = boost::math::isnan(a) || boost::math::isnan(b);
189 bool isInf = boost::math::isinf(a) || boost::math::isinf(b);
190 if (isNaN || isInf) {
191 fitness->setValue(1e14);
192 return fitness;
193 }
194
195 double value = 0;
196 double var_of_y = 0; //variance of variable result
197
198 for(uint i = 0; i < nSamples; i++) {
199 // set only defined variables (x1, x2, ...)
200 for(uint term = 0; term < nVariables; term++) {
201 tree->setTerminalValue(varNames[term], &eval.data[i][term]);
202 dataWriter->writeData(eval.data[i][term]);
203 dataWriter->writeData('\t');
204 }
205
206 double y;
207 tree->execute(&y);
208
209 var_of_y += (y-mean_y)*(y-mean_y);
210 // add the difference
211 double t = eval.data[i][nVariables];
212 double result = a + b*y;
213 dataWriter->writeData(result);
214 dataWriter->writeData('\n');
215
216 double error = errorMetric->calculateError(result, t);
217
218 if (errorWeightsDefined) {
219 error *= errorWeights[i];
220 }
221
222 value += error*error;
223 }
224
225 value /= nSamples;
226 var_of_y /= nSamples-1;
227
228 //delete expresion (but not explicit, just set fitness to big number)
229 // if ( fabs(var_of_y) > 1e7 || fabs(var_of_y) < 1e-7 ) {
230 // value = 1e14;
231 // }
232
233 if (printParams) {
234 std::cout << "\nLinear scaling parameters: scale=" << b << " offset=" << a << std::endl;
235 }
236
237 fitness->setValue(value);
238
239 return fitness;
240}
241
242
243FitnessP GPSymbRegEvalOp::evaluateWithoutLinearScaling(IndividualP individual)
244{
245 // we try to minimize the function value, so we use FitnessMin fitness (for minimization problems)
246 FitnessP fitness(new FitnessMin);
247
248 // get the genotype we defined in the configuration file
249 Tree::Tree* tree = (Tree::Tree*) individual->getGenotype().get();
250
251 // if using the Evaluator
252 //readIndividual(individual);
253
254 double value = 0;
255 double result;
256
257 // evaluating data from input file
258 for (uint i = 0; i < nSamples; i++) {
259 // set only defined variables (x1, x2, ...)
260 for(uint term = 0; term < nVariables; term++) {
261 tree->setTerminalValue(varNames[term], &eval.data[i][term]);
262 dataWriter->writeData(eval.data[i][term]);
263 dataWriter->writeData('\t');
264 }
265
266 // get the f value of the current tree
267 tree->execute(&result);
268 dataWriter->writeData(result);
269 dataWriter->writeData('\n');
270 // Evaluator:
271 //result = eval.executeParsedExpression(i);
272
273 // add the absolute difference
274 //value += fabs(eval.data[i][nVariables] - result);
275 // or squared error
276 double error = errorMetric->calculateError(result, eval.data[i][nVariables]);
277
278 if (errorWeightsDefined) {
279 error *= errorWeights[i];
280 }
281
282 value += error;
283 // += fabs(f.at(i*nSamples + j) - result);
284 }
285
286 value /= pow(1.*nSamples, 2);
287 fitness->setValue(value);
288 return fitness;
289}
290
291
292FitnessP GPSymbRegEvalOp::evaluate(IndividualP individual)
293{
294 if (linearScaling) {
295 return evaluateUsingLinearScaling(individual);
296 } else {
297 return evaluateWithoutLinearScaling(individual);
298 }
299}
300
301
302// read an individual into Evaluator parsedExpression - Tree version
303void GPSymbRegEvalOp::readIndividual(IndividualP individual)
304{
305 TreeP tree = boost::dynamic_pointer_cast<Tree::Tree> (individual->getGenotype());
306 static std::string strPrimitive;
307 uint nTreeSize;
308 uint nTrees = (uint) individual->size();
309 eval.parsedExpression.resize(nTrees);
310
311 // parse multiple trees (if any)
312 for(uint iTree = 0; iTree < nTrees; iTree++) {
313 TreeP pTree = boost::dynamic_pointer_cast<Tree::Tree> (individual->getGenotype(iTree)); // pokazivac na tree
314 nTreeSize = (uint) pTree->size();
315 eval.parsedExpression[iTree].resize(nTreeSize);
316
317 // read whole tree expression
318 for(uint i = 0; i < nTreeSize; i++) {
319 strPrimitive = (*pTree)[i]->primitive_->getName();
320
321 // check function names
322 for(uint iPrim = 0; iPrim < eval.funcNames.size(); iPrim++)
323 { // TODO: skip unused functions
324 //if(!Nodes[iPrim].active)
325 // continue;
326 if(strPrimitive == eval.funcNames[iPrim]) {
327 eval.parsedExpression[iTree][i] = iPrim;
328 break;
329 }
330 }
331
332 // check terminals
333 for(uint iPrim = 0; iPrim < eval.termNames.size(); iPrim++)
334 { //if(!Nodes[iPrim].active)
335 // continue;
336 if(strPrimitive == eval.termNames[iPrim]) {
337 eval.parsedExpression[iTree][i] = iPrim + Evaluator::TERMINALS;
338 break;
339 }
340 }
341 }
342 }
343}
344
345
346
Fitness for minimization problems.
Definition: FitnessMin.h:12
bool initialize(StateP)
Initialize the evaluator. Called before first evaluation occurs.
void registerParameters(StateP)
Register evaluator parameters. Called before EvaluateOp::initialize method.
FitnessP evaluate(IndividualP individual)
Evaluate a single individual. Method must create and return a Fitness object.
Tree class - implements genotype as a tree.
Definition: Tree_c.h:29
void setTerminalValue(std::string, void *)
Set a terminal's value.
Definition: Tree.cpp:504
void execute(void *)
Execute current tree.
Definition: Tree.cpp:362