- Integrated the file import into the main interface (not as a plugin anymore)
[mldemos:mldemos.git] / MLDemos / mltrain.cpp
1 /*********************************************************************
2 MLDemos: A User-Friendly visualization toolkit for machine learning
3 Copyright (C) 2010  Basilio Noris
4 Contact: mldemos@b4silio.com
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free
18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *********************************************************************/
20 #include "mldemos.h"
21 #include "basicMath.h"
22 #include "classifier.h"
23 #include "regressor.h"
24 #include "dynamical.h"
25 #include "clusterer.h"
26 #include "maximize.h"
27 #include "roc.h"
28 #include <QDebug>
29 #include <fstream>
30 #include <QPixmap>
31 #include <QBitmap>
32 #include <QSettings>
33 #include <QFileDialog>
34 #include <QProgressDialog>
35
36 using namespace std;
37
38 bool MLDemos::Train(Classifier *classifier, int positive, float trainRatio, bvec trainList)
39 {
40     if(!classifier) return false;
41     ivec labels = canvas->data->GetLabels();
42     ivec newLabels;
43     newLabels.resize(labels.size(), 1);
44     bool bMulticlass = classifier->IsMultiClass();
45     //std::map<int,int> classMap, inverseMap;
46     if(!bMulticlass)
47     {
48         if(positive == 0)
49         {
50             FOR(i, labels.size()) newLabels[i] = (!labels[i] || labels[i] == -1) ? 1 : -1;
51         }
52         else
53         {
54             FOR(i, labels.size()) newLabels[i] = (labels[i] == positive) ? 1 : -1;
55         }
56         bool bHasPositive = false, bHasNegative = false;
57         FOR(i, newLabels.size())
58         {
59             if(bHasPositive && bHasNegative) break;
60             bHasPositive |= newLabels[i] == 1;
61             bHasNegative |= newLabels[i] == -1;
62         }
63         if((!bHasPositive || !bHasNegative) && !classifier->SingleClass()) return false;
64     }
65     else
66     {
67         newLabels = labels;
68         /*
69         int cnt=0;
70         FOR(i, labels.size()) if(!classMap.count(labels[i])) classMap[labels[i]] = cnt++;
71         for(map<int,int>::iterator it=classMap.begin(); it != classMap.end(); it++) inverseMap[it->second] = it->first;
72         newLabels.resize(labels.size());
73         FOR(i, labels.size()) newLabels[i] = classMap[labels[i]];
74         */
75     }
76
77     classifier->rocdata.clear();
78     classifier->roclabels.clear();
79
80     lastTrainingInfo = "";
81     map<int, int> truePerClass;
82     map<int, int> falsePerClass;
83     map<int, int> countPerClass;
84
85     ivec inputDims = GetInputDimensions();
86     vector<fvec> samples = canvas->data->GetSampleDims(inputDims);
87     //vector<fvec> samples = canvas->data->GetSamples();
88     if(trainRatio == 1 && !trainList.size())
89     {
90         bool bTrueMulti = bMulticlass;
91         classifier->Train(samples, newLabels);
92         // we generate the roc curve for this guy
93         vector<f32pair> rocData;
94         FOR(i, samples.size())
95         {
96             if(bMulticlass)
97             {
98                 fvec res = classifier->TestMulti(samples[i]);
99                 if(res.size() == 1)
100                 {
101                     rocData.push_back(f32pair(res[0], newLabels[i]));
102                     float resp = res[0];
103                     if(resp > 0 && newLabels[i] == 1) truePerClass[1]++;
104                     else if(resp > 0 && newLabels[i] != 1) falsePerClass[0]++;
105                     else if(newLabels[i] == 1) falsePerClass[1]++;
106                     else truePerClass[0]++;
107                     bTrueMulti = false;
108                 }
109                 else
110                 {
111                     int maxClass = 0;
112                     for(int j=1; j<res.size(); j++) if(res[maxClass] < res[j]) maxClass = j;
113                     int c = classifier->inverseMap[maxClass];
114                     rocData.push_back(f32pair(c, newLabels[i]));
115                     if(newLabels[i] != c) falsePerClass[c]++;
116                     else truePerClass[c]++;
117                }
118             }
119             else
120             {
121                 float resp = classifier->Test(samples[i]);
122                 rocData.push_back(f32pair(resp, newLabels[i]));
123                 if(resp > 0 && newLabels[i] == 1) truePerClass[1]++;
124                 else if(resp > 0 && newLabels[i] != 1) falsePerClass[0]++;
125                 else if(newLabels[i] == 1) falsePerClass[1]++;
126                 else truePerClass[0]++;
127             }
128         }
129         FOR(i, labels.size())
130         {
131             if(bMulticlass) countPerClass[labels[i]]++;
132             else countPerClass[labels[i] == 1]++;
133         }
134         classifier->rocdata.push_back(rocData);
135         classifier->roclabels.push_back("training");
136         lastTrainingInfo += QString("\nTraining Set (% samples):\n").arg(samples.size());
137         if(bTrueMulti)
138         {
139             for(map<int,int>::iterator it = countPerClass.begin(); it != countPerClass.end(); it++)
140             {
141                 int c = it->first;
142                 int tp = truePerClass[c];
143                 int fp = falsePerClass[c];
144                 float ratio = (it->second != 0) ? tp / (float)it->second : 0;
145                 lastTrainingInfo += QString("Class %1 (%5 samples): %2 correct (%4%)\n%3 incorrect\n").arg(c).arg(tp).arg(fp).arg((int)(ratio*100)).arg(it->second);
146             }
147         }
148         else
149         {
150             int posClass = 1;
151             if(truePerClass[1] / (float) countPerClass[1] < 0.25)
152             {
153                 posClass = 0;
154             }
155             int tp = posClass ? truePerClass[1] : falsePerClass[1];
156             int fp = posClass ? falsePerClass[1] : truePerClass[1];
157             int count = countPerClass[1];
158             float ratio = count != 0 ? tp/(float)count : 1;
159             lastTrainingInfo += QString("Positive (%4 samples): %1 correct (%3%)\n%2 incorrect\n").arg(tp).arg(fp).arg((int)(ratio*100)).arg(count);
160             tp = posClass ? truePerClass[0] : falsePerClass[0];
161             fp = posClass ? falsePerClass[0] : truePerClass[0];
162             count = countPerClass[0];
163             ratio = count != 0 ? tp/(float)count : 1;
164             lastTrainingInfo += QString("Negative (%4 samples): %1 correct (%3%)\n%2 incorrect\n").arg(tp).arg(fp).arg((int)(ratio*100)).arg(count);
165         }
166     }
167     else
168     {
169         vector<fvec> trainSamples, testSamples;
170         ivec trainLabels, testLabels;
171         u32 *perm = 0;
172         int trainCnt, testCnt;
173         if(trainList.size())
174         {
175             FOR(i, trainList.size())
176             {
177                 if(trainList[i])
178                 {
179                     trainSamples.push_back(samples[i]);
180                     trainLabels.push_back(newLabels[i]);
181                 }
182                 else
183                 {
184                     testSamples.push_back(samples[i]);
185                     testLabels.push_back(newLabels[i]);
186                 }
187             }
188             trainCnt = trainSamples.size();
189             testCnt = testSamples.size();
190         }
191         else
192         {
193             map<int,int> classCnt, trainClassCnt, testClassCnt;
194             FOR(i, labels.size())
195             {
196                 classCnt[labels[i]]++;
197             }
198
199             trainCnt = (int)(samples.size()*trainRatio);
200             testCnt = samples.size() - trainCnt;
201             trainSamples.resize(trainCnt);
202             trainLabels.resize(trainCnt);
203             testSamples.resize(testCnt);
204             testLabels.resize(testCnt);
205             perm = randPerm(samples.size());
206             FOR(i, trainCnt)
207             {
208                 trainSamples[i] = samples[perm[i]];
209                 trainLabels[i] = newLabels[perm[i]];
210                 trainClassCnt[trainLabels[i]]++;
211             }
212             for(int i=trainCnt; i<samples.size(); i++)
213             {
214                 testSamples[i-trainCnt] = samples[perm[i]];
215                 testLabels[i-trainCnt] = newLabels[perm[i]];
216                 testClassCnt[trainLabels[i]]++;
217             }
218             // we need to make sure that we have at least one sample per class
219             for(map<int,int>::iterator it=classCnt.begin();it!=classCnt.end();it++)
220             {
221                 if(!trainClassCnt.count(it->first))
222                 {
223                     FOR(i, testSamples.size())
224                     {
225                         if(testLabels[i] != it->first) continue;
226                         trainSamples[i] = testSamples[i];
227                         trainLabels[i] = testLabels[i];
228                         testSamples.erase(testSamples.begin() + i);
229                         testLabels.erase(testLabels.begin() + i);
230                         trainCnt++;
231                         testCnt--;
232                         break;
233                     }
234                 }
235             }
236         }
237         classifier->Train(trainSamples, trainLabels);
238
239         // we generate the roc curve for this guy
240         bool bTrueMulti = bMulticlass;
241         vector<f32pair> rocData;
242         FOR(i, trainSamples.size())
243         {
244             if(bMulticlass)
245             {
246                 fvec res = classifier->TestMulti(trainSamples[i]);
247                 if(res.size() == 1)
248                 {
249                     rocData.push_back(f32pair(res[0], trainLabels[i]));
250                     float resp = res[0];
251                     if(resp > 0 && trainLabels[i] == 1) truePerClass[1]++;
252                     else if(resp > 0 && trainLabels[i] != 1) falsePerClass[0]++;
253                     else if(trainLabels[i] == 1) falsePerClass[1]++;
254                     else truePerClass[0]++;
255                     bTrueMulti = false;
256                 }
257                 else
258                 {
259                     int maxClass = 0;
260                     for(int j=1; j<res.size(); j++) if(res[maxClass] < res[j]) maxClass = j;
261                     rocData.push_back(f32pair(classifier->inverseMap[maxClass], trainLabels[i]));
262                     int c = classifier->inverseMap[maxClass];
263                     if(trainLabels[i] != c) falsePerClass[c]++;
264                     else truePerClass[c]++;
265
266                 }
267             }
268             else
269             {
270                 float resp = classifier->Test(trainSamples[i]);
271                 rocData.push_back(f32pair(resp, trainLabels[i]));
272                 if(resp > 0 && trainLabels[i] == 1) truePerClass[1]++;
273                 else if(resp > 0 && trainLabels[i] != 1) falsePerClass[0]++;
274                 else if(trainLabels[i] == 1) falsePerClass[1]++;
275                 else truePerClass[0]++;
276             }
277             if(bMulticlass) countPerClass[trainLabels[i]]++;
278             else countPerClass[trainLabels[i] == 1]++;
279         }
280         rocData = FixRocData(rocData);
281         classifier->rocdata.push_back(rocData);
282         classifier->roclabels.push_back("training");
283         lastTrainingInfo += QString("\nTraining Set (%1 samples):\n").arg(trainSamples.size());
284         int posClass = 1;
285         if(bTrueMulti)
286         {
287             for(map<int,int>::iterator it = countPerClass.begin(); it != countPerClass.end(); it++)
288             {
289                 int c = it->first;
290                 int tp = truePerClass[c];
291                 int fp = falsePerClass[c];
292                 float ratio = it->second != 0 ? tp / (float)it->second : 0;
293                 lastTrainingInfo += QString("Class %1 (%5 samples): %2 correct (%4%)\n%3 incorrect\n").arg(c).arg(tp).arg(fp).arg((int)(ratio*100)).arg(it->second);
294             }
295         }
296         else
297         {
298             if(truePerClass[1] / (float) countPerClass[1] < 0.25)
299             {
300                 posClass = 0;
301             }
302             int tp = posClass ? truePerClass[1] : falsePerClass[1];
303             int fp = posClass ? falsePerClass[1] : truePerClass[1];
304             int count = countPerClass[1];
305             float ratio = count != 0 ? tp/(float)count : 1;
306             lastTrainingInfo += QString("Positive (%4 samples): %1 correct (%3%)\n%2 incorrect\n").arg(tp).arg(fp).arg((int)(ratio*100)).arg(count);
307             tp = posClass ? truePerClass[0] : falsePerClass[0];
308             fp = posClass ? falsePerClass[0] : truePerClass[0];
309             count = countPerClass[0];
310             ratio = count != 0 ? tp/(float)count : 1;
311             lastTrainingInfo += QString("Negative (%4 samples): %1 correct (%3%)\n%2 incorrect\n").arg(tp).arg(fp).arg((int)(ratio*100)).arg(count);
312         }
313
314         truePerClass.clear();
315         falsePerClass.clear();
316         countPerClass.clear();
317         rocData.clear();
318         FOR(i, testSamples.size())
319         {
320             if(bMulticlass)
321             {
322                 fvec res = classifier->TestMulti(testSamples[i]);
323                 if(res.size() == 1)
324                 {
325                     rocData.push_back(f32pair(res[0], testLabels[i]));
326                     float resp = res[0];
327                     if(resp > 0 && testLabels[i] == 1) truePerClass[1]++;
328                     else if(resp > 0 && testLabels[i] != 1) falsePerClass[0]++;
329                     else if(testLabels[i] == 1) falsePerClass[1]++;
330                     else truePerClass[0]++;
331                     bTrueMulti = false;
332                 }
333                 else
334                 {
335                     int maxClass = 0;
336                     for(int j=1; j<res.size(); j++) if(res[maxClass] < res[j]) maxClass = j;
337                     rocData.push_back(f32pair(classifier->inverseMap[maxClass], testLabels[i]));
338                     int c = classifier->inverseMap[maxClass];
339                     if(testLabels[i] != c) falsePerClass[c]++;
340                     else truePerClass[c]++;
341                 }
342             }
343             else
344             {
345                 float resp = classifier->Test(testSamples[i]);
346                 rocData.push_back(f32pair(resp, testLabels[i]));
347                 if(resp > 0 && testLabels[i] == 1) truePerClass[1]++;
348                 else if(resp > 0 && testLabels[i] != 1) falsePerClass[0]++;
349                 else if(testLabels[i] == 1) falsePerClass[1]++;
350                 else truePerClass[0]++;
351             }
352             if(bMulticlass) countPerClass[testLabels[i]]++;
353             else countPerClass[testLabels[i] == 1]++;
354         }
355         rocData = FixRocData(rocData);
356         classifier->rocdata.push_back(rocData);
357         classifier->roclabels.push_back("test");
358         lastTrainingInfo += QString("\nTesting Set (%1 samples):\n").arg(testSamples.size());
359         if(bTrueMulti)
360         {
361             for(map<int,int>::iterator it = countPerClass.begin(); it != countPerClass.end(); it++)
362             {
363                 int c = it->first;
364                 int tp = truePerClass[c];
365                 int fp = falsePerClass[c];
366                 float ratio = it->second != 0 ? tp / (float)it->second : 0;
367                 lastTrainingInfo += QString("Class %1 (%5 samples): %2 correct (%4%)\n%3 incorrect\n").arg(c).arg(tp).arg(fp).arg((int)(ratio*100)).arg(it->second);
368             }
369         }
370         else
371         {
372             int tp = posClass ? truePerClass[1] : falsePerClass[1];
373             int fp = posClass ? falsePerClass[1] : truePerClass[1];
374             int count = countPerClass[1];
375             float ratio = count != 0 ? tp/(float)count : 1;
376             lastTrainingInfo += QString("Positive (%4 samples): %1 correct (%3%)\n%2 incorrect\n").arg(tp).arg(fp).arg((int)(ratio*100)).arg(count);
377             tp = posClass ? truePerClass[0] : falsePerClass[0];
378             fp = posClass ? falsePerClass[0] : truePerClass[0];
379             count = countPerClass[0];
380             ratio = count != 0 ? tp/(float)count : 1;
381             lastTrainingInfo += QString("Negative (%4 samples): %1 correct (%3%)\n%2 incorrect\n").arg(tp).arg(fp).arg((int)(ratio*100)).arg(count);
382         }
383         KILL(perm);
384     }
385     bIsRocNew = true;
386     bIsCrossNew = true;
387     SetROCInfo();
388     return true;
389 }
390
391 void MLDemos::Train(Regressor *regressor, int outputDim, float trainRatio, bvec trainList)
392 {
393     if(!regressor || !canvas->data->GetCount()) return;
394     ivec inputDims = GetInputDimensions();
395     int outputIndexInList = -1;
396     if(inputDims.size()==1 && inputDims[0] == outputDim) return; // we dont have enough dimensions for training
397     FOR(i, inputDims.size()) if(outputDim == inputDims[i])
398     {
399         outputIndexInList = i;
400         break;
401     }
402
403     vector<fvec> samples = canvas->data->GetSampleDims(inputDims, outputIndexInList == -1 ? outputDim : -1);
404     ivec labels = canvas->data->GetLabels();
405     if(!samples.size()) return;
406     int cnt = samples.size();
407     int dim = samples[0].size();
408     if(dim < 2) return;
409
410     regressor->SetOutputDim(outputDim);
411
412     fvec trainErrors, testErrors;
413     if(trainRatio == 1.f && !trainList.size())
414     {
415         regressor->Train(samples, labels);
416         trainErrors.clear();
417         FOR(i, samples.size())
418         {
419             fvec sample = samples[i];
420             int dim = sample.size();
421             fvec res = regressor->Test(sample);
422             float error = fabs(res[0] - sample[outputDim]);
423             trainErrors.push_back(error);
424         }
425         regressor->trainErrors = trainErrors;
426         regressor->testErrors.clear();
427     }
428     else
429     {
430         int trainCnt = (int)(samples.size()*trainRatio);
431         int testCnt = samples.size() - trainCnt;
432         u32 *perm = randPerm(samples.size());
433         vector<fvec> trainSamples, testSamples;
434         ivec trainLabels, testLabels;
435         if(trainList.size())
436         {
437             FOR(i, trainList.size())
438             {
439                 if(trainList[i])
440                 {
441                     trainSamples.push_back(samples[i]);
442                     trainLabels.push_back(labels[i]);
443                 }
444                 else
445                 {
446                     testSamples.push_back(samples[i]);
447                     testLabels.push_back(labels[i]);
448                 }
449             }
450         }
451         else
452         {
453             trainSamples.resize(trainCnt);
454             trainLabels.resize(trainCnt);
455             testSamples.resize(testCnt);
456             testLabels.resize(testCnt);
457             FOR(i, trainCnt)
458             {
459                 trainSamples[i] = samples[perm[i]];
460                 trainLabels[i] = labels[perm[i]];
461             }
462             FOR(i, testCnt)
463             {
464                 testSamples[i] = samples[perm[i+trainCnt]];
465                 testLabels[i] = labels[perm[i+trainCnt]];
466             }
467         }
468         regressor->Train(trainSamples, trainLabels);
469
470         FOR(i, trainCnt)
471         {
472             fvec sample = samples[perm[i]];
473             int dim = sample.size();
474             fvec res = regressor->Test(sample);
475             float error = fabs(res[0] - sample[outputDim]);
476             trainErrors.push_back(error);
477         }
478         FOR(i, testCnt)
479         {
480             fvec sample = testSamples[i];
481             int dim = sample.size();
482             fvec res = regressor->Test(sample);
483             float error = fabs(res[0] - sample[outputDim]);
484             testErrors.push_back(error);
485         }
486         regressor->trainErrors = trainErrors;
487         regressor->testErrors = testErrors;
488         KILL(perm);
489     }
490     bIsCrossNew = true;
491 }
492
493 // returns respectively the reconstruction error for the training points individually, per trajectory, and the error to target
494 fvec MLDemos::Train(Dynamical *dynamical)
495 {
496     if(!dynamical) return fvec();
497     vector<fvec> samples = canvas->data->GetSamples();
498     vector<ipair> sequences = canvas->data->GetSequences();
499     ivec labels = canvas->data->GetLabels();
500     if(!samples.size() || !sequences.size()) return fvec();
501     int dim = samples[0].size();
502     int count = optionsDynamic->resampleSpin->value();
503     int resampleType = optionsDynamic->resampleCombo->currentIndex();
504     int centerType = optionsDynamic->centerCombo->currentIndex();
505     bool zeroEnding = optionsDynamic->zeroCheck->isChecked();
506
507     ivec trajLabels(sequences.size());
508     FOR(i, sequences.size())
509     {
510         trajLabels[i] = canvas->data->GetLabel(sequences[i].first);
511     }
512
513     //float dT = 10.f; // time span between each data frame
514     float dT = optionsDynamic->dtSpin->value();
515     dynamical->dT = dT;
516     //dT = 10.f;
517     vector< vector<fvec> > trajectories = canvas->data->GetTrajectories(resampleType, count, centerType, dT, zeroEnding);
518     interpolate(trajectories[0],count);
519
520     dynamical->Train(trajectories, labels);
521     return Test(dynamical, trajectories, labels);
522 }
523
524 void MLDemos::Train(Clusterer *clusterer, bvec trainList)
525 {
526     if(!clusterer) return;
527     if(trainList.size())
528     {
529         vector<fvec> trainSamples;
530         FOR(i, trainList.size())
531         {
532             if(trainList[i])
533             {
534                 trainSamples.push_back(canvas->data->GetSample(i));
535             }
536         }
537         clusterer->Train(trainSamples);
538     }
539     else clusterer->Train(canvas->data->GetSamples());
540 }
541
542 void MLDemos::Train(Projector *projector, bvec trainList)
543 {
544     if(!projector) return;
545     if(trainList.size())
546     {
547         vector<fvec> trainSamples;
548         ivec trainLabels;
549         FOR(i, trainList.size())
550         {
551             if(trainList[i])
552             {
553                 trainSamples.push_back(canvas->data->GetSample(i));
554                 trainLabels.push_back(canvas->data->GetLabel(i));
555             }
556         }
557         projector->Train(trainSamples, trainLabels);
558     }
559     else projector->Train(canvas->data->GetSamples(), canvas->data->GetLabels());
560 }
561
562 void MLDemos::Train(Maximizer *maximizer)
563 {
564     if(!maximizer) return;
565     if(canvas->maps.reward.isNull()) return;
566     QImage rewardImage = canvas->maps.reward.toImage();
567     QRgb *pixels = (QRgb*) rewardImage.bits();
568     int w = rewardImage.width();
569     int h = rewardImage.height();
570     float *data = new float[w*h];
571
572     float maxData = 0;
573     FOR(i, w*h)
574     {
575         data[i] = 1.f - qBlue(pixels[i])/255.f; // all data is in a 0-1 range
576         maxData = max(maxData, data[i]);
577     }
578     if(maxData > 0)
579     {
580         FOR(i, w*h) data[i] /= maxData; // we ensure that the data is normalized
581     }
582     fvec startingPoint;
583     if(canvas->targets.size())
584     {
585         startingPoint = canvas->targets[canvas->targets.size()-1];
586         QPointF starting = canvas->toCanvasCoords(startingPoint);
587         startingPoint[0] = starting.x()/w;
588         startingPoint[1] = starting.y()/h;
589     }
590     maximizer->Train(data, fVec(w,h), startingPoint);
591     maximizer->age = 0;
592     delete [] data;
593 }
594
595 void MLDemos::Test(Maximizer *maximizer)
596 {
597     if(!maximizer) return;
598     do
599     {
600         fvec sample = maximizer->Test(maximizer->Maximum());
601         maximizer->age++;
602     }
603     while(maximizer->age < maximizer->maxAge && maximizer->MaximumValue() < maximizer->stopValue);
604 }
605
606 // returns respectively the reconstruction error for the training points individually, per trajectory, and the error to target
607 fvec MLDemos::Test(Dynamical *dynamical, vector< vector<fvec> > trajectories, ivec labels)
608 {
609     if(!dynamical || !trajectories.size()) return fvec();
610     int dim = trajectories[0][0].size()/2;
611     //(int dim = dynamical->Dim();
612     float dT = dynamical->dT;
613     fvec sample; sample.resize(dim,0);
614     fvec vTrue; vTrue.resize(dim, 0);
615     fvec xMin(dim, FLT_MAX);
616     fvec xMax(dim, -FLT_MAX);
617
618     // test each trajectory for errors
619     int errorCnt=0;
620     float errorOne = 0, errorAll = 0;
621     FOR(i, trajectories.size())
622     {
623         vector<fvec> t = trajectories[i];
624         float errorTraj = 0;
625         FOR(j, t.size())
626         {
627             FOR(d, dim)
628             {
629                 sample[d] = t[j][d];
630                 vTrue[d] = t[j][d+dim];
631                 if(xMin[d] > sample[d]) xMin[d] = sample[d];
632                 if(xMax[d] < sample[d]) xMax[d] = sample[d];
633             }
634             fvec v = dynamical->Test(sample);
635             float error = 0;
636             FOR(d, dim) error += (v[d] - vTrue[d])*(v[d] - vTrue[d]);
637             errorTraj += error;
638             errorCnt++;
639         }
640         errorOne += errorTraj;
641         errorAll += errorTraj / t.size();
642     }
643     errorOne /= errorCnt;
644     errorAll /= trajectories.size();
645     fvec res;
646     res.push_back(errorOne);
647
648     vector<fvec> endpoints;
649
650     float errorTarget = 0;
651     // test each trajectory for target
652     fvec pos(dim), end(dim);
653     FOR(i, trajectories.size())
654     {
655         FOR(d, dim)
656         {
657             pos[d] = trajectories[i].front()[d];
658             end[d] = trajectories[i].back()[d];
659         }
660         if(!endpoints.size()) endpoints.push_back(end);
661         else
662         {
663             bool bExists = false;
664             FOR(j, endpoints.size())
665             {
666                 if(endpoints[j] == end)
667                 {
668                     bExists = true;
669                     break;
670                 }
671             }
672             if(!bExists) endpoints.push_back(end);
673         }
674         int steps = 500;
675         float eps = FLT_MIN;
676         FOR(j, steps)
677         {
678             fvec v = dynamical->Test(pos);
679             float speed = 0;
680             FOR(d, dim) speed += v[d]*v[d];
681             speed = sqrtf(speed);
682             if(speed*dT < eps) break;
683             pos += v*dT;
684         }
685         float error = 0;
686         FOR(d, dim)
687         {
688             error += (pos[d] - end[d])*(pos[d] - end[d]);
689         }
690         error = sqrtf(error);
691         errorTarget += error;
692     }
693     errorTarget /= trajectories.size();
694     res.push_back(errorTarget);
695
696     fvec xDiff = xMax - xMin;
697     errorTarget = 0;
698     int testCount = 100;
699     FOR(i, testCount)
700     {
701         FOR(d, dim)
702         {
703             pos[d] = ((drand48()*2 - 0.5)*xDiff[d] + xMin[d]);
704         }
705
706         int steps = 500;
707         float eps = FLT_MIN;
708         FOR(j, steps)
709         {
710             fvec v = dynamical->Test(pos);
711             float speed = 0;
712             FOR(d, dim) speed += v[d]*v[d];
713             speed = sqrtf(speed);
714             if(speed*dT < eps) break;
715             pos += v*dT;
716         }
717         float minError = FLT_MAX;
718         FOR(j, endpoints.size())
719         {
720             float error = 0;
721             FOR(d, dim)
722             {
723                 error += (pos[d] - endpoints[j][d])*(pos[d] - endpoints[j][d]);
724             }
725             error = sqrtf(error);
726             if(minError > error) minError = error;
727         }
728         errorTarget += minError;
729     }
730     errorTarget /= testCount;
731     res.push_back(errorTarget);
732
733     return res;
734 }
735
736 void MLDemos::Compare()
737 {
738     if(!canvas) return;
739     if(!compareOptions.size()) return;
740
741     QMutexLocker lock(&mutex);
742     drawTimer->Stop();
743     DEL(clusterer);
744     DEL(regressor);
745     DEL(dynamical);
746     DEL(classifier);
747     DEL(maximizer);
748     DEL(projector);
749     // we start parsing the algorithm list
750     int folds = optionsCompare->foldCountSpin->value();
751     float ratios [] = {.1f,.25f,1.f/3.f,.5f,2.f/3.f,.75f,.9f,1.f};
752     int ratioIndex = optionsCompare->traintestRatioCombo->currentIndex();
753     float trainRatio = ratios[ratioIndex];
754     //int positive = optionsCompare->positiveSpin->value();
755     int positive = 1;
756
757     compare->Clear();
758
759     QProgressDialog progress("Comparing Algorithms", "cancel", 0, folds*compareOptions.size());
760     progress.show();
761     FOR(i, compareOptions.size())
762     {
763         QString string = compareOptions[i];
764         QTextStream stream(&string);
765         QString line = stream.readLine();
766         QString paramString = stream.readAll();
767         if(line.startsWith("Maximization"))
768         {
769             QStringList s = line.split(":");
770             int tab = s[1].toInt();
771             if(tab >= maximizers.size() || !maximizers[tab]) continue;
772             QTextStream paramStream(&paramString);
773             QString paramName;
774             float paramValue;
775             while(!paramStream.atEnd())
776             {
777                 paramStream >> paramName;
778                 paramStream >> paramValue;
779                 maximizers[tab]->LoadParams(paramName, paramValue);
780             }
781             QString algoName = maximizers[tab]->GetAlgoString();
782             fvec resultIt, resultVal, resultEval;
783             FOR(f, folds)
784             {
785                 maximizer = maximizers[tab]->GetMaximizer();
786                 if(!maximizer) continue;
787                 maximizer->maxAge = optionsMaximize->iterationsSpin->value();
788                 maximizer->stopValue = optionsMaximize->stoppingSpin->value();
789                 Train(maximizer);
790                 Test(maximizer);
791                 resultIt.push_back(maximizer->age);
792                 resultVal.push_back(maximizer->MaximumValue());
793                 resultEval.push_back(maximizer->Evaluations());
794                 progress.setValue(f + i*folds);
795                 DEL(maximizer);
796                 qApp->processEvents(QEventLoop::ExcludeUserInputEvents);
797                 if(progress.wasCanceled())
798                 {
799                     compare->AddResults(resultEval, "Evaluations", algoName);
800                     compare->AddResults(resultVal, "Reward", algoName);
801                     compare->AddResults(resultIt, "Iterations", algoName);
802                     compare->Show();
803                     return;
804                 }
805             }
806             compare->AddResults(resultEval, "Evaluations", algoName);
807             compare->AddResults(resultVal, "Reward", algoName);
808             compare->AddResults(resultIt, "Iterations", algoName);
809         }
810         if(line.startsWith("Classification"))
811         {
812             QStringList s = line.split(":");
813             int tab = s[1].toInt();
814             if(tab >= classifiers.size() || !classifiers[tab]) continue;
815             QTextStream paramStream(&paramString);
816             QString paramName;
817             float paramValue;
818             while(!paramStream.atEnd())
819             {
820                 paramStream >> paramName;
821                 paramStream >> paramValue;
822                 classifiers[tab]->LoadParams(paramName, paramValue);
823             }
824             QString algoName = classifiers[tab]->GetAlgoString();
825             fvec fmeasureTrain, fmeasureTest, errorTrain, errorTest, precisionTrain, precisionTest, recallTrain, recallTest;
826
827             bvec trainList;
828             if(optionsClassify->manualTrainButton->isChecked())
829             {
830                 // we get the list of samples that are checked
831                 trainList = GetManualSelection();
832             }
833
834             map<int,int> classes;
835             FOR(j, canvas->data->GetLabels().size()) classes[canvas->data->GetLabels()[j]]++;
836
837             FOR(f, folds)
838             {
839                 classifier = classifiers[tab]->GetClassifier();
840                 if(!classifier) continue;
841                 Train(classifier, positive, trainRatio, trainList);
842                 bool bMulti = classifier->IsMultiClass() && DatasetManager::GetClassCount(canvas->data->GetLabels());
843                 if(classifier->rocdata.size()>0)
844                 {
845                     if(!bMulti || classes.size() <= 2)
846                     {
847                         fvec res = GetBestFMeasure(classifier->rocdata[0]);
848                         fmeasureTrain.push_back(res[0]);
849                         precisionTrain.push_back(res[1]);
850                         recallTrain.push_back(res[2]);
851                     }
852                     else
853                     {
854                         int errors = 0;
855                         std::vector<f32pair> rocdata = classifier->rocdata[0];
856                         FOR(j, rocdata.size())
857                         {
858                             if(rocdata[j].first != rocdata[j].second)
859                             {
860                                 if(classes.size() > 2) errors++;
861                                 else if((rocdata[j].first < 0) != rocdata[j].second) errors++;
862                             }
863                         }
864                         if(classes.size() <= 2)
865                         {
866                             float e = min(errors,(int)rocdata.size()-errors)/(float)rocdata.size();
867                             fmeasureTrain.push_back(1-e);
868                             fmeasureTrain.push_back(1-e);
869                         }
870                         else
871                         {
872                             errorTrain.push_back(errors/(float)rocdata.size());
873                             errorTest.push_back(errors/(float)rocdata.size());
874                         }
875                     }
876                 }
877                 if(classifier->rocdata.size()>1)
878                 {
879                     if(!bMulti || classes.size() <= 2)
880                     {
881                         fvec res = GetBestFMeasure(classifier->rocdata[1]);
882                         fmeasureTest.push_back(res[0]);
883                         precisionTest.push_back(res[1]);
884                         recallTest.push_back(res[2]);
885                     }
886                     else
887                     {
888                         int errors = 0;
889                         std::vector<f32pair> rocdata = classifier->rocdata[1];
890                         FOR(j, rocdata.size())
891                         {
892                             if(rocdata[j].first != rocdata[j].second)
893                             {
894                                 if(classes.size() > 2) errors++;
895                                 else if((rocdata[j].first < 0) != rocdata[j].second) errors++;
896                             }
897                         }
898                         if(classes.size() <= 2) errorTest.push_back(min(errors,(int)rocdata.size()-errors)/(float)rocdata.size());
899                         else errorTest.push_back(errors/(float)rocdata.size());
900                     }
901                 }
902                 DEL(classifier);
903
904                 progress.setValue(f + i*folds);
905                 qApp->processEvents(QEventLoop::ExcludeUserInputEvents);
906                 if(progress.wasCanceled())
907                 {
908                     compare->AddResults(fmeasureTest,   "F-Measure (Test)", algoName);
909                     compare->AddResults(errorTest,      "Error (Test)", algoName);
910                     compare->AddResults(precisionTest,  "Precision (Test)", algoName);
911                     compare->AddResults(recallTest,     "Recall (Test)", algoName);
912                     compare->AddResults(fmeasureTrain,  "F-Measure (Training)", algoName);
913                     compare->AddResults(errorTrain,     "Error (Training)", algoName);
914                     compare->AddResults(precisionTrain, "Precision (Training)", algoName);
915                     compare->AddResults(recallTrain,    "Recall (Training)", algoName);
916                     //compare->SetActiveResult(1);
917                     compare->Show();
918                     return;
919                 }
920             }
921             compare->AddResults(fmeasureTest,   "F-Measure (Test)", algoName);
922             compare->AddResults(errorTest,      "Error (Test)", algoName);
923             compare->AddResults(precisionTest,  "Precision (Test)", algoName);
924             compare->AddResults(recallTest,     "Recall (Test)", algoName);
925             compare->AddResults(fmeasureTrain,  "F-Measure (Training)", algoName);
926             compare->AddResults(errorTrain,     "Error (Training)", algoName);
927             compare->AddResults(precisionTrain, "Precision (Training)", algoName);
928             compare->AddResults(recallTrain,    "Recall (Training)", algoName);
929             //compare->SetActiveResult(1);
930         }
931         if(line.startsWith("Regression"))
932         {
933             QStringList s = line.split(":");
934             int tab = s[1].toInt();
935             if(tab >= regressors.size() || !regressors[tab]) continue;
936             int outputDim = optionsCompare->outputDimCombo->currentIndex();
937             QTextStream paramStream(&paramString);
938             QString paramName;
939             float paramValue;
940             while(!paramStream.atEnd())
941             {
942                 paramStream >> paramName;
943                 paramStream >> paramValue;
944                 regressors[tab]->LoadParams(paramName, paramValue);
945             }
946             bvec trainList;
947             if(optionsRegress->manualTrainButton->isChecked())
948             {
949                 // we get the list of samples that are checked
950                 trainList = GetManualSelection();
951             }
952
953             QString algoName = regressors[tab]->GetAlgoString();
954             fvec resultTrain, resultTest;
955             FOR(f, folds)
956             {
957                 regressor = regressors[tab]->GetRegressor();
958                 if(!regressor) continue;
959                 Train(regressor, outputDim, trainRatio, trainList);
960                 if(regressor->trainErrors.size())
961                 {
962                     float error = 0.f;
963                     FOR(i, regressor->trainErrors.size()) error += regressor->trainErrors[i];
964                     error /= regressor->trainErrors.size();
965                     resultTrain.push_back(error);
966                 }
967                 if(regressor->testErrors.size())
968                 {
969                     float error = 0.f;
970                     FOR(i, regressor->testErrors.size()) error += regressor->testErrors[i];
971                     error /= regressor->testErrors.size();
972                     resultTest.push_back(error);
973                 }
974                 DEL(regressor);
975
976                 progress.setValue(f + i*folds);
977                 qApp->processEvents(QEventLoop::ExcludeUserInputEvents);
978                 if(progress.wasCanceled())
979                 {
980                     compare->AddResults(resultTest, "Error (Testing)", algoName);
981                     compare->AddResults(resultTrain, "Error (Training)", algoName);
982                     compare->Show();
983                     return;
984                 }
985             }
986             compare->AddResults(resultTest, "Error (Testing)", algoName);
987             compare->AddResults(resultTrain, "Error (Training)", algoName);
988         }
989         if(line.startsWith("Dynamical"))
990         {
991             QStringList s = line.split(":");
992             int tab = s[1].toInt();
993             if(tab >= dynamicals.size() || !dynamicals[tab]) continue;
994             QTextStream paramStream(&paramString);
995             QString paramName;
996             float paramValue;
997             while(!paramStream.atEnd())
998             {
999                 paramStream >> paramName;
1000                 paramStream >> paramValue;
1001                 dynamicals[tab]->LoadParams(paramName, paramValue);
1002             }
1003             QString algoName = dynamicals[tab]->GetAlgoString();
1004             fvec resultReconst, resultTargetTraj, resultTarget;
1005             FOR(f, folds)
1006             {
1007                 dynamical = dynamicals[tab]->GetDynamical();
1008                 if(!dynamical) continue;
1009                 fvec results = Train(dynamical);
1010                 if(results.size())
1011                 {
1012                     resultReconst.push_back(results[0]);
1013                     resultTargetTraj.push_back(results[1]);
1014                     resultTarget.push_back(results[2]);
1015                 }
1016                 DEL(dynamical);
1017
1018                 progress.setValue(f + i*folds);
1019                 qApp->processEvents(QEventLoop::ExcludeUserInputEvents);
1020                 if(progress.wasCanceled())
1021                 {
1022                     compare->AddResults(resultReconst, "Reconstruction Error", algoName);
1023                     compare->AddResults(resultTargetTraj, "Target Error (trajectories)", algoName);
1024                     compare->AddResults(resultTarget, "Target Error (random points)", algoName);
1025                     compare->Show();
1026                     return;
1027                 }
1028             }
1029             compare->AddResults(resultReconst, "Reconstruction Error", algoName);
1030             compare->AddResults(resultTargetTraj, "Target Error (trajectories)", algoName);
1031             compare->AddResults(resultTarget, "Target Error (random points)", algoName);
1032         }
1033         compare->Show();
1034     }
1035 }