- Integrated the file import into the main interface (not as a plugin anymore)
[mldemos:mldemos.git] / MLDemos / mlprocessing.cpp
1 /*********************************************************************
2 MLDemos: A User-Friendly visualization toolkit for machine learning
3 Copyright (C) 2010  Basilio Noris
4 Contact: mldemos@b4silio.com
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free
18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *********************************************************************/
20 #include "mldemos.h"
21 #include "basicMath.h"
22 #include "classifier.h"
23 #include "regressor.h"
24 #include "dynamical.h"
25 #include "clusterer.h"
26 #include "maximize.h"
27 #include "roc.h"
28 #include <QDebug>
29 #include <fstream>
30 #include <QPixmap>
31 #include <QBitmap>
32 #include <QSettings>
33 #include <QFileDialog>
34 #include <QProgressDialog>
35
36 using namespace std;
37
38 void MLDemos::Classify()
39 {
40     if(!canvas || !canvas->data->GetCount()) return;
41     drawTimer->Stop();
42         drawTimer->Clear();
43         mutex.lock();
44     DEL(clusterer);
45     DEL(regressor);
46     DEL(dynamical);
47     DEL(classifier);
48         DEL(maximizer);
49     DEL(projector);
50     lastTrainingInfo = "";
51     int tab = optionsClassify->tabWidget->currentIndex();
52     if(tab >= classifiers.size() || !classifiers[tab]) return;
53
54     classifier = classifiers[tab]->GetClassifier();
55     tabUsedForTraining = tab;
56     float ratios [] = {.1f,.25f,1.f/3.f,.5f,2.f/3.f,.75f,.9f,1.f};
57     int ratioIndex = optionsClassify->traintestRatioCombo->currentIndex();
58     float trainRatio = ratios[ratioIndex];
59     int positive = optionsClassify->positiveSpin->value();
60     vector<bool> trainList;
61     if(optionsClassify->manualTrainButton->isChecked())
62     {
63         // we get the list of samples that are checked
64         trainList = GetManualSelection();
65     }
66
67     bool trained = Train(classifier, positive, trainRatio, trainList);
68     if(trained)
69     {
70                 classifiers[tab]->Draw(canvas, classifier);
71                 UpdateInfo();
72                 if(drawTimer && classifier->UsesDrawTimer())
73         {
74             drawTimer->classifier = &this->classifier;
75             drawTimer->start(QThread::NormalPriority);
76         }
77         if(canvas->canvasType) CanvasOptionsChanged();
78         // we fill in the canvas sampleColors
79         vector<fvec> samples = canvas->data->GetSamples();
80         canvas->sampleColors.resize(samples.size());
81         FOR(i, samples.size())
82         {
83             canvas->sampleColors[i] = DrawTimer::GetColor(classifier, samples[i]);
84         }
85         if(canvas->canvasType)
86         {
87             canvas->maps.model = QPixmap();
88             CanvasOptionsChanged();
89         }
90         canvas->repaint();
91     }
92     else
93     {
94         mutex.unlock();
95         Clear();
96                 mutex.lock();
97                 UpdateInfo();
98         }
99         mutex.unlock();
100 }
101
102 void MLDemos::ClassifyCross()
103 {
104     if(!canvas || !canvas->data->GetCount()) return;
105     drawTimer->Stop();
106     QMutexLocker lock(&mutex);
107     DEL(clusterer);
108     DEL(regressor);
109     DEL(dynamical);
110     DEL(classifier);
111         DEL(maximizer);
112     DEL(projector);
113     lastTrainingInfo = "";
114     int tab = optionsClassify->tabWidget->currentIndex();
115     if(tab >= classifiers.size() || !classifiers[tab]) return;
116     tabUsedForTraining = tab;
117
118     float ratios [] = {.1f,.25f,1.f/3.f,.5f,2.f/3.f,.75f,.9f,1.f};
119     int ratioIndex = optionsClassify->traintestRatioCombo->currentIndex();
120     float trainRatio = ratios[ratioIndex];
121     int positive = optionsClassify->positiveSpin->value();
122     int foldCount = optionsClassify->foldCountSpin->value();
123     vector<bool> trainList;
124     if(optionsClassify->manualTrainButton->isChecked())
125     {
126         // we get the list of samples that are checked
127         trainList = GetManualSelection();
128     }
129
130     vector<fvec> fmeasures;
131     fmeasures.resize(2);
132     bool trained = false;
133     FOR(f,foldCount)
134     {
135         DEL(classifier);
136         classifier = classifiers[tab]->GetClassifier();
137         trained = Train(classifier, positive, trainRatio, trainList);
138         if(!trained) break;
139         if(classifier->rocdata.size()>0)
140         {
141             fmeasures[0].push_back(GetBestFMeasure(classifier->rocdata[0])[0]);
142         }
143         if(classifier->rocdata.size()>1)
144         {
145             fmeasures[1].push_back(GetBestFMeasure(classifier->rocdata[1])[0]);
146         }
147     }
148     classifier->crossval = fmeasures;
149     ShowCross();
150     //if(trained) classifiers[tab]->Draw(canvas, classifier);
151     DEL(classifier);
152     UpdateInfo();
153 }
154
155 vector<bool> MLDemos::GetManualSelection()
156 {
157     vector<bool> trainList;
158     if(!canvas || !canvas->data->GetCount()) return trainList;
159     trainList.resize(manualSelection->sampleList->count(), false);
160     QList<QListWidgetItem*> selected = manualSelection->sampleList->selectedItems();
161     if(!selected.size()) // if nothing is selected we use all samples as training
162     {
163         trainList = vector<bool>(canvas->data->GetCount(), true);
164         return trainList;
165     }
166     FOR(i, selected.size())
167     {
168         int index = manualSelection->sampleList->row(selected[i]);
169         trainList[index] = true;
170     }
171     return trainList;
172 }
173
174 ivec MLDemos::GetInputDimensions()
175 {
176     if(!canvas || !canvas->data->GetCount()) return ivec();
177     QList<QListWidgetItem*> selected = inputDimensions->dimList->selectedItems();
178     if(!selected.size() || selected.size() == inputDimensions->dimList->count()) return ivec(); // if nothing is selected we use all dimensions for training
179     ivec dimList(selected.size());
180     FOR(i, selected.size())
181     {
182         dimList[i] = inputDimensions->dimList->row(selected[i]);
183     }
184     return dimList;
185 }
186
187 void MLDemos::Regression()
188 {
189     if(!canvas || !canvas->data->GetCount()) return;
190     drawTimer->Stop();
191         drawTimer->Clear();
192
193     QMutexLocker lock(&mutex);
194     DEL(clusterer);
195     DEL(regressor);
196     DEL(dynamical);
197     DEL(classifier);
198         DEL(maximizer);
199     DEL(projector);
200     lastTrainingInfo = "";
201     int tab = optionsRegress->tabWidget->currentIndex();
202     if(tab >= regressors.size() || !regressors[tab]) return;
203     int outputDim = optionsRegress->outputDimCombo->currentIndex();
204     ivec inputDims = GetInputDimensions();
205     //ivec inputDims = optionsRegress->inputDimButton->isChecked() ? GetInputDimensions() : ivec();
206      if(inputDims.size()==1 && inputDims[0] == outputDim) return;
207
208     int outputIndexInList = -1;
209     FOR(i, inputDims.size()) if(outputDim == inputDims[i])
210     {
211         outputIndexInList = i;
212         break;
213     }
214
215     regressor = regressors[tab]->GetRegressor();
216     tabUsedForTraining = tab;
217
218     float ratios [] = {.1f,.25f,1.f/3.f,.5f,2.f/3.f,.75f,.9f,1.f};
219     int ratioIndex = optionsRegress->traintestRatioCombo->currentIndex();
220     float trainRatio = ratios[ratioIndex];
221
222     vector<bool> trainList;
223     if(optionsRegress->manualTrainButton->isChecked())
224     {
225         // we get the list of samples that are checked
226         trainList = GetManualSelection();
227     }
228
229     Train(regressor, outputDim, trainRatio, trainList);
230     if(outputDim != -1)
231     {
232         ui.canvasX2Spin->setValue(outputDim+1);
233         DisplayOptionChanged();
234     }
235     regressors[tab]->Draw(canvas, regressor);
236
237     // here we draw the errors for each sample
238     if(canvas->data->GetDimCount() > 2 && canvas->canvasType == 0)
239     {
240         vector<fvec> samples = canvas->data->GetSamples();
241         vector<fvec> subsamples = canvas->data->GetSampleDims(inputDims, outputIndexInList==-1 ? outputDim : -1);
242         ivec labels = canvas->data->GetLabels();
243         QPainter painter(&canvas->maps.model);
244         painter.setRenderHint(QPainter::Antialiasing);
245         // we draw the starting sample
246         painter.setOpacity(0.4);
247         painter.setPen(Qt::black);
248         painter.setBrush(Qt::white);
249         FOR(i, samples.size())
250         {
251             fvec sample = samples[i];
252             QPointF point = canvas->toCanvasCoords(sample);
253             painter.drawEllipse(point, 6,6);
254         }
255         // we draw the estimated sample
256         painter.setPen(Qt::white);
257         painter.setBrush(Qt::black);
258         FOR(i, samples.size())
259         {
260             fvec sample = samples[i];
261             fvec estimate = regressor->Test(subsamples[i]);
262             sample[outputDim] = estimate[0];
263             QPointF point2 = canvas->toCanvasCoords(sample);
264             painter.drawEllipse(point2, 5,5);
265         }
266         painter.setOpacity(1);
267         // we draw the error bars
268         FOR(i, samples.size())
269         {
270             fvec sample = samples[i];
271             fvec estimate = regressor->Test(subsamples[i]);
272             QPointF point = canvas->toCanvasCoords(sample);
273             sample[outputDim] = estimate[0];
274             QPointF point2 = canvas->toCanvasCoords(sample);
275             QColor color = SampleColor[labels[i]%SampleColorCnt];
276             if(!labels[i]) color = Qt::black;
277             painter.setPen(QPen(color, 1));
278             painter.drawLine(point, point2);
279         }
280     }
281     UpdateInfo();
282 }
283
284 void MLDemos::RegressionCross()
285 {
286     if(!canvas || !canvas->data->GetCount()) return;
287     drawTimer->Stop();
288         drawTimer->Clear();
289         QMutexLocker lock(&mutex);
290     DEL(clusterer);
291     DEL(regressor);
292     DEL(dynamical);
293     DEL(classifier);
294         DEL(maximizer);
295     DEL(projector);
296     lastTrainingInfo = "";
297     int tab = optionsRegress->tabWidget->currentIndex();
298     if(tab >= regressors.size() || !regressors[tab]) return;
299     int outputDim = optionsRegress->outputDimCombo->currentIndex();
300     regressor = regressors[tab]->GetRegressor();
301     tabUsedForTraining = tab;
302
303     float ratios [] = {.1f,.25f,1.f/3.f,.5f,2.f/3.f,.75f,.9f,1.f};
304     int ratioIndex = optionsRegress->traintestRatioCombo->currentIndex();
305     float trainRatio = ratios[ratioIndex];
306     int foldCount = optionsRegress->foldCountSpin->value();
307
308     vector<fvec> errors;
309     errors.resize(2);
310     FOR(f,foldCount)
311     {
312         DEL(regressor);
313         regressor = regressors[tab]->GetRegressor();
314         Train(regressor, trainRatio);
315         if(regressor->trainErrors.size())
316         {
317             errors[0] = regressor->trainErrors;
318         }
319         if(regressor->testErrors.size())
320         {
321             errors[1] = regressor->testErrors;
322         }
323     }
324     regressor->crossval = errors;
325     ShowCross();
326
327     Train(regressor, outputDim, trainRatio);
328     regressors[tab]->Draw(canvas, regressor);
329         UpdateInfo();
330 }
331
332 void MLDemos::Dynamize()
333 {
334     if(!canvas || !canvas->data->GetCount() || !canvas->data->GetSequences().size()) return;
335     drawTimer->Stop();
336         drawTimer->Clear();
337     QMutexLocker lock(&mutex);
338     DEL(clusterer);
339     DEL(regressor);
340     DEL(dynamical);
341     DEL(classifier);
342         DEL(maximizer);
343     DEL(projector);
344     lastTrainingInfo = "";
345     int tab = optionsDynamic->tabWidget->currentIndex();
346     if(tab >= dynamicals.size() || !dynamicals[tab]) return;
347     dynamical = dynamicals[tab]->GetDynamical();
348     tabUsedForTraining = tab;
349
350     Train(dynamical);
351     dynamicals[tab]->Draw(canvas,dynamical);
352
353         int w = canvas->width(), h = canvas->height();
354
355         int resampleType = optionsDynamic->resampleCombo->currentIndex();
356         int resampleCount = optionsDynamic->resampleSpin->value();
357         int centerType = optionsDynamic->centerCombo->currentIndex();
358         float dT = optionsDynamic->dtSpin->value();
359         int zeroEnding = optionsDynamic->zeroCheck->isChecked();
360         bool bColorMap = optionsDynamic->colorCheck->isChecked();
361
362     // we draw the current trajectories
363         vector< vector<fvec> > trajectories = canvas->data->GetTrajectories(resampleType, resampleCount, centerType, dT, zeroEnding);
364     vector< vector<fvec> > testTrajectories;
365     int steps = 300;
366         if(trajectories.size())
367         {
368         testTrajectories.resize(trajectories.size());
369         int dim = trajectories[0][0].size() / 2;
370         FOR(i, trajectories.size())
371         {
372             fvec start(dim,0);
373             FOR(d, dim) start[d] = trajectories[i][0][d];
374             vector<fvec> result = dynamical->Test(start, steps);
375             testTrajectories[i] = result;
376         }
377         canvas->maps.model = QPixmap(w,h);
378         QBitmap bitmap(w,h);
379         bitmap.clear();
380         canvas->maps.model.setMask(bitmap);
381         canvas->maps.model.fill(Qt::transparent);
382
383         if(canvas->canvasType == 0) // standard canvas
384         {
385             QPainter painter(&canvas->maps.model);
386             painter.setRenderHint(QPainter::Antialiasing);
387             FOR(i, testTrajectories.size())
388             {
389                 vector<fvec> &result = testTrajectories[i];
390                 fvec oldPt = result[0];
391                 int count = result.size();
392                 FOR(j, count-1)
393                 {
394                     fvec pt = result[j+1];
395                     painter.setPen(QPen(Qt::green, 2));
396                     painter.drawLine(canvas->toCanvasCoords(pt), canvas->toCanvasCoords(oldPt));
397                     oldPt = pt;
398                 }
399                 painter.setBrush(Qt::NoBrush);
400                 painter.setPen(Qt::green);
401                 painter.drawEllipse(canvas->toCanvasCoords(result[0]), 5, 5);
402                 painter.setPen(Qt::red);
403                 painter.drawEllipse(canvas->toCanvasCoords(result[count-1]), 5, 5);
404             }
405         }
406         else
407         {
408             pair<fvec,fvec> bounds = canvas->data->GetBounds();
409             Expose::DrawTrajectories(canvas->maps.model, testTrajectories, vector<QColor>(), canvas->canvasType-1, 1, bounds);
410         }
411         }
412
413         // the first index is "none", so we subtract 1
414     int avoidIndex = optionsDynamic->obstacleCombo->currentIndex()-1;
415     if(avoidIndex >=0 && avoidIndex < avoiders.size() && avoiders[avoidIndex])
416     {
417         DEL(dynamical->avoid);
418                 dynamical->avoid = avoiders[avoidIndex]->GetObstacleAvoidance();
419     }
420         UpdateInfo();
421         if(dynamicals[tab]->UsesDrawTimer())
422         {
423                 drawTimer->bColorMap = bColorMap;
424                 drawTimer->start(QThread::NormalPriority);
425         }
426 }
427
428 void MLDemos::Avoidance()
429 {
430     if(!canvas || !dynamical) return;
431     drawTimer->Stop();
432     QMutexLocker lock(&mutex);
433         // the first index is "none", so we subtract 1
434         int index = optionsDynamic->obstacleCombo->currentIndex()-1;
435     if(index >=0 && index >= avoiders.size() || !avoiders[index]) return;
436     DEL(dynamical->avoid);
437     dynamical->avoid = avoiders[index]->GetObstacleAvoidance();
438         UpdateInfo();
439         drawTimer->Clear();
440     drawTimer->start(QThread::NormalPriority);
441 }
442
443 fvec ClusterMetrics(std::vector<fvec> samples, ivec labels, std::vector<fvec> scores, float ratio = 1.f)
444 {
445     fvec results(4, 0);
446     results[0] = drand48();
447     if(!samples.size() || !scores.size()) return results;
448     int dim = samples[0].size();
449     int nbClusters = scores[0].size();
450     int count = samples.size();
451     // compute bic
452     double loglik = 0;
453
454     vector<fvec> means(nbClusters);
455     FOR(k, nbClusters)
456     {
457         means[k] = fvec(dim, 0);
458         float contrib = 0;
459         FOR(i, count)
460         {
461             contrib += scores[i][k];
462             means[k] += samples[i]*scores[i][k];
463         }
464         means[k] /= contrib;
465     }
466
467     float log_lik=0;
468     float like;
469     float *pxi = new float[nbClusters];
470     int data_i=0;
471     int state_i;
472
473     fvec loglikes(nbClusters);
474     FOR(k, nbClusters)
475     {
476         float rss = 0;
477         double contrib = 0;
478         FOR(i, count)
479         {
480             contrib += scores[i][k];
481             if(contrib==0) continue;
482             fvec diff = samples[i]-means[k];
483             rss += diff*diff*scores[i][k];
484         }
485         loglikes[k] = rss;
486     }
487     FOR(k, nbClusters) loglik += loglikes[k];
488     //loglik /= nbClusters;
489
490     results[0] = loglik; // RSS
491     results[1] = log(count)*nbClusters + loglik; // BIC
492     results[2] = 2*nbClusters + loglik; // AIC
493
494
495     // we compute the f-measures for each class
496     map<int,int> classcounts;
497     int cnt = 0;
498     FOR(i, labels.size()) if(!classcounts.count(labels[i])) classcounts[labels[i]] = cnt++;
499     int classCount = classcounts.size();
500     map<int, fvec> classScores;
501     fvec clusterScores(nbClusters);
502     map<int,float> labelScores;
503
504     if(ratio == 1.f)
505     {
506         FOR(i, labels.size())
507         {
508             labelScores[labels[i]] += 1.f;
509             if(!classScores.count(labels[i]))classScores[labels[i]].resize(nbClusters);
510             FOR(k, nbClusters)
511             {
512                 classScores[labels[i]][k] += scores[i][k];
513                 clusterScores[k] += scores[i][k];
514             }
515         }
516     }
517     else
518     {
519         u32 *perm = randPerm(labels.size());
520         map<int, ivec> indices;
521         FOR(i, labels.size()) indices[labels[perm[i]]].push_back(perm[i]);
522         for(map<int,ivec>::iterator it = indices.begin(); it != indices.end(); it++)
523         {
524             int labelCount = max(1,int(it->second.size()*ratio));
525             FOR(i, labelCount)
526             {
527                 labelScores[labels[it->second[i]]] += 1.f;
528                 if(!classScores.count(labels[it->second[i]]))classScores[labels[it->second[i]]].resize(nbClusters);
529                 FOR(k, nbClusters)
530                 {
531                     classScores[labels[it->second[i]]][k] += scores[it->second[i]][k];
532                     clusterScores[k] += scores[it->second[i]][k];
533                 }
534             }
535         }
536         delete [] perm;
537     }
538
539     float fmeasure = 0;
540     map<int,float>::iterator it2 = labelScores.begin();
541     for(map<int,fvec>::iterator it = classScores.begin(); it != classScores.end(); it++, it2++)
542     {
543         float maxScore = -FLT_MAX;
544         FOR(k, nbClusters)
545         {
546             float precision = it->second[k] / it2->second;
547             float recall = it->second[k] / clusterScores[k];
548             float f1 = 2*precision*recall/(precision+recall);
549             maxScore = max(maxScore,f1);
550         }
551         fmeasure += maxScore;
552     }
553     int classAndClusterCount = classCount;
554     // we penalize empty clusters
555     FOR(k, nbClusters) if(clusterScores[k] == 0) classAndClusterCount++; // we have an empty cluster!
556     fmeasure /= classAndClusterCount;
557
558     results[3] = -fmeasure; // F-Measure
559
560     return results;
561 }
562
563
564 void MLDemos::Cluster()
565 {
566     if(!canvas || !canvas->data->GetCount()) return;
567     drawTimer->Stop();
568     QMutexLocker lock(&mutex);
569     DEL(clusterer);
570     DEL(regressor);
571     DEL(dynamical);
572     DEL(classifier);
573         DEL(maximizer);
574     DEL(projector);
575     lastTrainingInfo = "";
576     int tab = optionsCluster->tabWidget->currentIndex();
577     if(tab >= clusterers.size() || !clusterers[tab]) return;
578     clusterer = clusterers[tab]->GetClusterer();
579     tabUsedForTraining = tab;
580     vector<bool> trainList;
581     if(optionsCluster->manualTrainButton->isChecked())
582     {
583         // we get the list of samples that are checked
584         trainList = GetManualSelection();
585     }
586     Train(clusterer, trainList);
587         drawTimer->Stop();
588         drawTimer->Clear();
589     clusterers[tab]->Draw(canvas,clusterer);
590
591     // we compute the stats on the clusters (f-measure, bic etc)
592
593     vector<fvec> samples = canvas->data->GetSamples();
594     ivec labels = canvas->data->GetLabels();
595     vector<fvec> clusterScores(samples.size());
596     FOR(i, canvas->data->GetCount())
597     {
598         fvec result = clusterer->Test(samples[i]);
599         if(clusterer->NbClusters()==1) clusterScores[i] = result;
600         else if(result.size()>1) clusterScores[i] = result;
601         else if(result.size())
602         {
603             fvec res(clusterer->NbClusters(),0);
604             res[result[0]] = 1.f;
605         }
606     }
607
608     int ratioIndex = optionsCluster->trainRatioCombo->currentIndex();
609     float ratios[] = {0.01f, 0.05f, 0.1f, 0.2f, 1.f/3.f, 0.5f, 0.75f, 1.f};
610     float ratio = ratios[ratioIndex];
611
612     fvec clusterMetrics = ClusterMetrics(samples, labels, clusterScores, ratio);
613
614     optionsCluster->resultList->clear();
615     optionsCluster->resultList->addItem(QString("rss: %1").arg(clusterMetrics[0], 0, 'f', 2));
616     optionsCluster->resultList->addItem(QString("bic: %1").arg(clusterMetrics[1], 0, 'f', 2));
617     optionsCluster->resultList->addItem(QString("aic: %1").arg(clusterMetrics[2], 0, 'f', 2));
618     optionsCluster->resultList->addItem(QString("f1: %1").arg(clusterMetrics[3], 0, 'f', 2));
619     FOR(i, clusterMetrics.size())
620     {
621         optionsCluster->resultList->item(i)->setForeground(i ? SampleColor[i%SampleColorCnt] : Qt::gray);
622     }
623
624
625     // we fill in the canvas sampleColors for the alternative display types
626     canvas->sampleColors.resize(samples.size());
627     FOR(i, samples.size())
628     {
629         fvec res = clusterer->Test(samples[i]);
630         float r=0,g=0,b=0;
631         if(res.size() > 1)
632         {
633             FOR(j, res.size())
634             {
635                 r += SampleColor[(j+1)%SampleColorCnt].red()*res[j];
636                 g += SampleColor[(j+1)%SampleColorCnt].green()*res[j];
637                 b += SampleColor[(j+1)%SampleColorCnt].blue()*res[j];
638             }
639         }
640         else if(res.size())
641         {
642             r = (1-res[0])*255 + res[0]* 255;
643             g = (1-res[0])*255;
644             b = (1-res[0])*255;
645         }
646         canvas->sampleColors[i] = QColor(r,g,b);
647     }
648     canvas->maps.model = QPixmap();
649     canvas->repaint();
650
651         UpdateInfo();
652         drawTimer->clusterer= &this->clusterer;
653         drawTimer->start(QThread::NormalPriority);
654 }
655
656 void MLDemos::ClusterOptimize()
657 {
658     if(!canvas || !canvas->data->GetCount()) return;
659     drawTimer->Stop();
660     drawTimer->Clear();
661     QMutexLocker lock(&mutex);
662     DEL(clusterer);
663     DEL(regressor);
664     DEL(dynamical);
665     DEL(classifier);
666     DEL(maximizer);
667     DEL(projector);
668     lastTrainingInfo = "";
669
670     int tab = optionsCluster->tabWidget->currentIndex();
671     if(tab >= clusterers.size() || !clusterers[tab]) return;
672     clusterer = clusterers[tab]->GetClusterer();
673     tabUsedForTraining = tab;
674
675     int startCount=1, stopCount=11;
676
677     vector<fvec> samples = canvas->data->GetSamples();
678     ivec labels = canvas->data->GetLabels();
679     int ratioIndex = optionsCluster->trainRatioCombo->currentIndex();
680     float ratios[] = {0.01f, 0.05f, 0.1f, 0.2f, 1.f/3.f, 0.5f, 0.75f, 1.f};
681     float ratio = ratios[ratioIndex];
682
683     vector<bool> trainList;
684     if(optionsCluster->manualTrainButton->isChecked())
685     {
686         // we get the list of samples that are checked
687         trainList = GetManualSelection();
688     }
689
690     ivec kCounts;
691     vector<fvec> results(4);
692     for(int k=startCount; k<stopCount; k++)
693     {
694         clusterer->SetNbClusters(k);
695         Train(clusterer, trainList);
696
697         int folds = 10;
698         fvec metricMeans(results.size());
699         ivec foldCount(results.size());
700         FOR(f, folds)
701         {
702             vector<fvec> clusterScores(samples.size());
703             FOR(i, canvas->data->GetCount())
704             {
705                 fvec result = clusterer->Test(samples[i]);
706                 if(clusterer->NbClusters()==1) clusterScores[i] = result;
707                 else if(result.size()>1) clusterScores[i] = result;
708                 else if(result.size())
709                 {
710                     fvec res(clusterer->NbClusters(),0);
711                     res[result[0]] = 1.f;
712                 }
713             }
714             fvec clusterMetrics = ClusterMetrics(samples, labels, clusterScores, ratio);
715             FOR(d, clusterMetrics.size())
716             {
717                 if(clusterMetrics[d] != clusterMetrics[d]) continue;
718                 metricMeans[d] += clusterMetrics[d];
719                 foldCount[d]++;
720             }
721         }
722         FOR(d, metricMeans.size()) metricMeans[d] /= foldCount[d];
723         kCounts.push_back(k);
724         FOR(i, metricMeans.size()) results[i].push_back(metricMeans[i]);
725     }
726
727     int w = optionsCluster->graphLabel->width();
728     int h = optionsCluster->graphLabel->height();
729     int pad = 6;
730     QPixmap pixmap(w,h);
731     QBitmap bitmap(w,h);
732     bitmap.clear();
733     pixmap.setMask(bitmap);
734     pixmap.fill(Qt::transparent);
735     QPainter painter(&pixmap);
736
737     painter.setPen(QPen(Qt::black, 1.f));
738     painter.drawLine(pad, h - 2*pad, w-pad, h-2*pad);
739     painter.drawLine(pad, 0, pad, h-2*pad);
740     QFont font = painter.font();
741     font.setPointSize(9);
742     painter.setFont(font);
743     FOR(k, kCounts.size())
744     {
745         float x = k/(float)(kCounts.size()-1);
746         painter.drawLine(x*(w-2*pad)+pad, h-2*pad-1, x*(w-2*pad)+pad, h-2*pad+1);
747         if(k == kCounts.size()-1) x -= 0.05;
748         painter.drawText(x*(w-2*pad)-2+pad, h-1, QString("%1").arg(kCounts[k]));
749     }
750
751     painter.setRenderHint(QPainter::Antialiasing);
752     fvec mins(results.size(), FLT_MAX), maxes(results.size(), -FLT_MAX);
753     FOR(i, results.size())
754     {
755         FOR(j, results[i].size())
756         {
757             mins[i] = min(mins[i], results[i][j]);
758             maxes[i] = max(maxes[i], results[i][j]);
759         }
760     }
761     vector< pair<float,int> > bests(results.size());
762     FOR(i, results.size())
763     {
764         QPointF old;
765         painter.setPen(QPen(i ? SampleColor[i%SampleColorCnt] : Qt::gray,2));
766         bests[i] = make_pair(FLT_MAX, 0);
767         FOR(k, kCounts.size())
768         {
769             if(results[i][k] < bests[i].first)
770             {
771                 bests[i] = make_pair(results[i][k], kCounts[k]);
772             }
773             float x = k/(float)(kCounts.size()-1);
774             float y = (results[i][k] - mins[i])/(maxes[i]-mins[i]);
775             if(i == 3) y = 1.f - y; // fmeasures needs to be maximized
776             QPointF point(x*(w-2*pad)+pad, (1.f-y)*(h-2*pad));
777             if(k) painter.drawLine(old, point);
778             old = point;
779         }
780     }
781     optionsCluster->graphLabel->setPixmap(pixmap);
782
783     optionsCluster->resultList->clear();
784     optionsCluster->resultList->addItem(QString("rss: %1 (%2)").arg(bests[0].second).arg(bests[0].first, 0, 'f', 2));
785     optionsCluster->resultList->addItem(QString("bic: %1 (%2)").arg(bests[1].second).arg(bests[1].first, 0, 'f', 2));
786     optionsCluster->resultList->addItem(QString("aic: %1 (%2)").arg(bests[2].second).arg(bests[2].first, 0, 'f', 2));
787     optionsCluster->resultList->addItem(QString("f1: %1 (%2)").arg(bests[3].second).arg(-bests[3].first, 0, 'f', 2));
788     FOR(i, results.size())
789     {
790         optionsCluster->resultList->item(i)->setForeground(i ? SampleColor[i%SampleColorCnt] : Qt::gray);
791     }
792
793     int bestIndex = optionsCluster->optimizeCombo->currentIndex();
794     clusterer->SetNbClusters(bests[bestIndex].second);
795     Train(clusterer);
796
797     // we fill in the canvas sampleColors for the alternative display types
798     canvas->sampleColors.resize(samples.size());
799     FOR(i, samples.size())
800     {
801         fvec res = clusterer->Test(samples[i]);
802         float r=0,g=0,b=0;
803         if(res.size() > 1)
804         {
805             FOR(j, res.size())
806             {
807                 r += SampleColor[(j+1)%SampleColorCnt].red()*res[j];
808                 g += SampleColor[(j+1)%SampleColorCnt].green()*res[j];
809                 b += SampleColor[(j+1)%SampleColorCnt].blue()*res[j];
810             }
811         }
812         else if(res.size())
813         {
814             r = (1-res[0])*255 + res[0]* 255;
815             g = (1-res[0])*255;
816             b = (1-res[0])*255;
817         }
818         canvas->sampleColors[i] = QColor(r,g,b);
819     }
820     canvas->maps.model = QPixmap();
821
822     clusterers[tab]->Draw(canvas, clusterer);
823     drawTimer->Clear();
824     UpdateInfo();
825     drawTimer->clusterer= &this->clusterer;
826     drawTimer->start(QThread::NormalPriority);
827     canvas->repaint();
828
829 }
830
831 void MLDemos::ClusterIterate()
832 {
833     if(!canvas || !canvas->data->GetCount()) return;
834     drawTimer->Stop();
835     int tab = optionsCluster->tabWidget->currentIndex();
836     if(tab >= clusterers.size() || !clusterers[tab]) return;
837     QMutexLocker lock(&mutex);
838     if(!clusterer)
839     {
840         clusterer = clusterers[tab]->GetClusterer();
841         tabUsedForTraining = tab;
842     }
843     else clusterers[tab]->SetParams(clusterer);
844     clusterer->SetIterative(true);
845     Train(clusterer);
846     clusterers[tab]->Draw(canvas,clusterer);
847
848     // we fill in the canvas sampleColors
849     vector<fvec> samples = canvas->data->GetSamples();
850     canvas->sampleColors.resize(samples.size());
851     FOR(i, samples.size())
852     {
853         fvec res = clusterer->Test(samples[i]);
854         float r=0,g=0,b=0;
855         if(res.size() > 1)
856         {
857             FOR(j, res.size())
858             {
859                 r += SampleColor[(j+1)%SampleColorCnt].red()*res[j];
860                 g += SampleColor[(j+1)%SampleColorCnt].green()*res[j];
861                 b += SampleColor[(j+1)%SampleColorCnt].blue()*res[j];
862             }
863         }
864         else if(res.size())
865         {
866             r = (1-res[0])*255 + res[0]* 255;
867             g = (1-res[0])*255;
868             b = (1-res[0])*255;
869         }
870         canvas->sampleColors[i] = QColor(r,g,b);
871     }
872     canvas->maps.model = QPixmap();
873     canvas->repaint();
874
875     UpdateInfo();
876 }
877
878 void MLDemos::Maximize()
879 {
880         if(!canvas) return;
881         if(canvas->maps.reward.isNull()) return;
882         QMutexLocker lock(&mutex);
883         drawTimer->Stop();
884         DEL(clusterer);
885         DEL(regressor);
886         DEL(dynamical);
887         DEL(classifier);
888         DEL(maximizer);
889     DEL(projector);
890     lastTrainingInfo = "";
891     int tab = optionsMaximize->tabWidget->currentIndex();
892         if(tab >= maximizers.size() || !maximizers[tab]) return;
893         maximizer = maximizers[tab]->GetMaximizer();
894         maximizer->maxAge = optionsMaximize->iterationsSpin->value();
895         maximizer->stopValue = optionsMaximize->stoppingSpin->value();
896         tabUsedForTraining = tab;
897         Train(maximizer);
898
899         UpdateInfo();
900         drawTimer->Stop();
901         drawTimer->Clear();
902         drawTimer->start(QThread::NormalPriority);
903 }
904
905 void MLDemos::MaximizeContinue()
906 {
907         if(!canvas || !maximizer) return;
908         QMutexLocker lock(&mutex);
909         if(drawTimer)
910         {
911                 drawTimer->Stop();
912         }
913         maximizer->SetConverged(!maximizer->hasConverged());
914
915         UpdateInfo();
916         if(drawTimer)
917         {
918                 drawTimer->start(QThread::NormalPriority);
919         }
920 }
921
922 void MLDemos::Project()
923 {
924     if(!canvas) return;
925     QMutexLocker lock(&mutex);
926     drawTimer->Stop();
927     drawTimer->Clear();
928     DEL(clusterer);
929     DEL(regressor);
930     DEL(dynamical);
931     DEL(classifier);
932     DEL(maximizer);
933     DEL(projector);
934     lastTrainingInfo = "";
935     int tab = optionsProject->tabWidget->currentIndex();
936     if(tab >= projectors.size() || !projectors[tab]) return;
937     projector = projectors[tab]->GetProjector();
938     projectors[tab]->SetParams(projector);
939     tabUsedForTraining = tab;
940     bool bHasSource = false;
941     if(sourceData.size() && sourceData.size() == canvas->data->GetCount())
942     {
943         bHasSource = true;
944         canvas->data->SetSamples(sourceData);
945         canvas->data->SetLabels(sourceLabels);
946     }
947     vector<bool> trainList;
948     if(optionsProject->manualTrainButton->isChecked())
949     {
950         // we get the list of samples that are checked
951         trainList = GetManualSelection();
952     }
953     Train(projector, trainList);
954     if(!bHasSource)
955     {
956         sourceData = canvas->data->GetSamples();
957         sourceLabels = canvas->data->GetLabels();
958     }
959     projectedData = projector->GetProjected();
960     if(projectedData.size())
961     {
962         canvas->data->SetSamples(projectedData);
963         canvas->data->bProjected = true;
964     }
965     //canvas->FitToData();
966     CanvasTypeChanged();
967     CanvasOptionsChanged();
968     if(!canvas->canvasType)
969     {
970         projectors[tab]->Draw(canvas, projector);
971     }
972     canvas->repaint();
973     UpdateInfo();
974 }
975
976 void MLDemos::ProjectRevert()
977 {
978     QMutexLocker lock(&mutex);
979     drawTimer->Stop();
980     drawTimer->Clear();
981     DEL(clusterer);
982     DEL(regressor);
983     DEL(dynamical);
984     DEL(classifier);
985     DEL(maximizer);
986     DEL(projector);
987     lastTrainingInfo = "";
988     if(!sourceData.size()) return;
989     canvas->data->SetSamples(sourceData);
990     canvas->data->SetLabels(sourceLabels);
991     canvas->data->bProjected = false;
992     canvas->maps.info = QPixmap();
993     canvas->maps.model = QPixmap();
994     canvas->FitToData();
995     CanvasTypeChanged();
996     CanvasOptionsChanged();
997     canvas->repaint();
998     UpdateInfo();
999     sourceData.clear();
1000     sourceLabels.clear();
1001 }
1002
1003 void MLDemos::ProjectReproject()
1004 {
1005     if(!canvas) return;
1006     mutex.lock();
1007     sourceData = canvas->data->GetSamples();
1008     sourceLabels = canvas->data->GetLabels();
1009     mutex.unlock();
1010     Project();
1011 }
1012
1013 void MLDemos::ExportOutput()
1014 {
1015     if(!classifier && !regressor && !clusterer && !dynamical && !maximizer) return;
1016     // get a file
1017 }
1018
1019 void MLDemos::ExportAnimation()
1020 {
1021     if(!canvas->data->GetSamples().size()) return;
1022 }
1023
1024
1025 void MLDemos::UpdateLearnedModel()
1026 {
1027     if(!canvas) return;
1028     if(!clusterer && !regressor && !dynamical && !classifier && !projector) return;
1029     if(classifier)
1030     {
1031         QMutexLocker lock(&mutex);
1032         classifiers[tabUsedForTraining]->Draw(canvas, classifier);
1033         if(classifier->UsesDrawTimer() && !drawTimer->isRunning())
1034         {
1035             drawTimer->start(QThread::NormalPriority);
1036         }
1037     }
1038     if(clusterer)
1039     {
1040         QMutexLocker lock(&mutex);
1041         clusterers[tabUsedForTraining]->Draw(canvas, clusterer);
1042     }
1043     if(regressor)
1044     {
1045         QMutexLocker lock(&mutex);
1046         regressors[tabUsedForTraining]->Draw(canvas, regressor);
1047         // here we draw the errors for each sample
1048         int outputDim = optionsRegress->outputDimCombo->currentIndex();
1049         ivec inputDims = GetInputDimensions();
1050         //ivec inputDims = optionsRegress->inputDimButton->isChecked() ? GetInputDimensions() : ivec();
1051          if(inputDims.size()==1 && inputDims[0] == outputDim) return;
1052
1053         int outputIndexInList = -1;
1054         FOR(i, inputDims.size()) if(outputDim == inputDims[i])
1055         {
1056             outputIndexInList = i;
1057             break;
1058         }
1059         if(canvas->data->GetDimCount() > 2 && canvas->canvasType == 0)
1060         {
1061             vector<fvec> samples = canvas->data->GetSamples();
1062             vector<fvec> subsamples = canvas->data->GetSampleDims(inputDims, outputIndexInList==-1 ? outputDim : -1);
1063             ivec labels = canvas->data->GetLabels();
1064             QPainter painter(&canvas->maps.model);
1065             painter.setRenderHint(QPainter::Antialiasing);
1066             // we draw the starting sample
1067             painter.setOpacity(0.4);
1068             painter.setPen(Qt::black);
1069             painter.setBrush(Qt::white);
1070             FOR(i, samples.size())
1071             {
1072                 fvec sample = samples[i];
1073                 QPointF point = canvas->toCanvasCoords(sample);
1074                 painter.drawEllipse(point, 6,6);
1075             }
1076             // we draw the estimated sample
1077             painter.setPen(Qt::white);
1078             painter.setBrush(Qt::black);
1079             FOR(i, samples.size())
1080             {
1081                 fvec sample = samples[i];
1082                 fvec estimate = regressor->Test(subsamples[i]);
1083                 sample[outputDim] = estimate[0];
1084                 QPointF point2 = canvas->toCanvasCoords(sample);
1085                 painter.drawEllipse(point2, 5,5);
1086             }
1087             painter.setOpacity(1);
1088             // we draw the error bars
1089             FOR(i, samples.size())
1090             {
1091                 fvec sample = samples[i];
1092                 fvec estimate = regressor->Test(subsamples[i]);
1093                 QPointF point = canvas->toCanvasCoords(sample);
1094                 sample[outputDim] = estimate[0];
1095                 QPointF point2 = canvas->toCanvasCoords(sample);
1096                 QColor color = SampleColor[labels[i]%SampleColorCnt];
1097                 if(!labels[i]) color = Qt::black;
1098                 painter.setPen(QPen(color, 1));
1099                 painter.drawLine(point, point2);
1100             }
1101         }
1102     }
1103     if(dynamical)
1104     {
1105         QMutexLocker lock(&mutex);
1106         dynamicals[tabUsedForTraining]->Draw(canvas, dynamical);
1107         int w = canvas->width(), h = canvas->height();
1108
1109         int resampleType = optionsDynamic->resampleCombo->currentIndex();
1110         int resampleCount = optionsDynamic->resampleSpin->value();
1111         int centerType = optionsDynamic->centerCombo->currentIndex();
1112         float dT = optionsDynamic->dtSpin->value();
1113         int zeroEnding = optionsDynamic->zeroCheck->isChecked();
1114         bool bColorMap = optionsDynamic->colorCheck->isChecked();
1115
1116         // we draw the current trajectories
1117         vector< vector<fvec> > trajectories = canvas->data->GetTrajectories(resampleType, resampleCount, centerType, dT, zeroEnding);
1118         vector< vector<fvec> > testTrajectories;
1119         int steps = 300;
1120         if(trajectories.size())
1121         {
1122             testTrajectories.resize(trajectories.size());
1123             int dim = trajectories[0][0].size() / 2;
1124             FOR(i, trajectories.size())
1125             {
1126                 fvec start(dim,0);
1127                 FOR(d, dim) start[d] = trajectories[i][0][d];
1128                 vector<fvec> result = dynamical->Test(start, steps);
1129                 testTrajectories[i] = result;
1130             }
1131             canvas->maps.model = QPixmap(w,h);
1132             QBitmap bitmap(w,h);
1133             bitmap.clear();
1134             canvas->maps.model.setMask(bitmap);
1135             canvas->maps.model.fill(Qt::transparent);
1136
1137             if(canvas->canvasType == 0) // standard canvas
1138             {
1139                 QPainter painter(&canvas->maps.model);
1140                 painter.setRenderHint(QPainter::Antialiasing);
1141                 FOR(i, testTrajectories.size())
1142                 {
1143                     vector<fvec> &result = testTrajectories[i];
1144                     fvec oldPt = result[0];
1145                     int count = result.size();
1146                     FOR(j, count-1)
1147                     {
1148                         fvec pt = result[j+1];
1149                         painter.setPen(QPen(Qt::green, 2));
1150                         painter.drawLine(canvas->toCanvasCoords(pt), canvas->toCanvasCoords(oldPt));
1151                         oldPt = pt;
1152                     }
1153                     painter.setBrush(Qt::NoBrush);
1154                     painter.setPen(Qt::green);
1155                     painter.drawEllipse(canvas->toCanvasCoords(result[0]), 5, 5);
1156                     painter.setPen(Qt::red);
1157                     painter.drawEllipse(canvas->toCanvasCoords(result[count-1]), 5, 5);
1158                 }
1159             }
1160             else
1161             {
1162                 pair<fvec,fvec> bounds = canvas->data->GetBounds();
1163                 Expose::DrawTrajectories(canvas->maps.model, testTrajectories, vector<QColor>(), canvas->canvasType-1, 1, bounds);
1164             }
1165         }
1166
1167         // the first index is "none", so we subtract 1
1168         int avoidIndex = optionsDynamic->obstacleCombo->currentIndex()-1;
1169         if(avoidIndex >=0 && avoidIndex < avoiders.size() && avoiders[avoidIndex])
1170         {
1171             DEL(dynamical->avoid);
1172             dynamical->avoid = avoiders[avoidIndex]->GetObstacleAvoidance();
1173         }
1174         UpdateInfo();
1175         if(dynamicals[tabUsedForTraining]->UsesDrawTimer())
1176         {
1177             drawTimer->bColorMap = bColorMap;
1178             drawTimer->start(QThread::NormalPriority);
1179         }
1180
1181     }
1182     if(projector)
1183     {
1184         projectors[tabUsedForTraining]->Draw(canvas, projector);
1185     }
1186     UpdateInfo();
1187 }