Fixed the display problem related to resizing the windows on mac.
[mldemos:auto-amcs-auto-amcs-mldemos.git] / MLDemos / mlprocessing.cpp
1 /*********************************************************************
2 MLDemos: A User-Friendly visualization toolkit for machine learning
3 Copyright (C) 2010  Basilio Noris
4 Contact: mldemos@b4silio.com
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free
18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *********************************************************************/
20 #include "mldemos.h"
21 #include "basicMath.h"
22 #include "basicOpenCV.h"
23 #include "classifier.h"
24 #include "regressor.h"
25 #include "dynamical.h"
26 #include "clusterer.h"
27 #include "roc.h"
28 #include <QDebug>
29 #include <fstream>
30 #include <QPixmap>
31 #include <QBitmap>
32 #include <QSettings>
33 #include <QFileDialog>
34
35 using namespace std;
36
37 void MLDemos::Classify()
38 {
39     if(!canvas || !canvas->data->GetCount()) return;
40     drawTimer->Stop();
41     mutex.lock();
42     DEL(clusterer);
43     DEL(regressor);
44     DEL(dynamical);
45     DEL(classifier);
46     int tab = optionsClassify->tabWidget->currentIndex();
47     if(tab >= classifiers.size() || !classifiers[tab]) return;
48     classifier = classifiers[tab]->GetClassifier();
49     tabUsedForTraining = tab;
50     float ratios [] = {.1f,.25f,1.f/3.f,.5f,2.f/3.f,.75f,.9f,1.f};
51     int ratioIndex = optionsClassify->traintestRatioCombo->currentIndex();
52     float trainRatio = ratios[ratioIndex];
53     int positive = optionsClassify->positiveSpin->value();
54
55     if(drawTimer)
56     {
57         drawTimer->Stop();
58         drawTimer->Clear();
59     }
60     bool trained = Train(classifier, positive, trainRatio);
61     if(trained)
62     {
63         classifiers[tab]->Draw(canvas, classifier);
64         if(drawTimer && classifier->UsesDrawTimer())
65         {
66             drawTimer->classifier = &this->classifier;
67             drawTimer->start(QThread::NormalPriority);
68         }
69     }
70     else
71     {
72         mutex.unlock();
73         Clear();
74     }
75     UpdateInfo();
76     mutex.unlock();
77 }
78
79
80 void MLDemos::ClassifyCross()
81 {
82     if(!canvas || !canvas->data->GetCount()) return;
83     drawTimer->Stop();
84     QMutexLocker lock(&mutex);
85     DEL(clusterer);
86     DEL(regressor);
87     DEL(dynamical);
88     DEL(classifier);
89     int tab = optionsClassify->tabWidget->currentIndex();
90     if(tab >= classifiers.size() || !classifiers[tab]) return;
91     tabUsedForTraining = tab;
92
93     float ratios [] = {.1f,.25f,1.f/3.f,.5f,2.f/3.f,.75f,.9f,1.f};
94     int ratioIndex = optionsClassify->traintestRatioCombo->currentIndex();
95     float trainRatio = ratios[ratioIndex];
96     int positive = optionsClassify->positiveSpin->value();
97     int foldCount = optionsClassify->foldCountSpin->value();
98
99     vector<fvec> fmeasures;
100     fmeasures.resize(2);
101     bool trained = false;
102     FOR(f,foldCount)
103     {
104         DEL(classifier);
105         classifier = classifiers[tab]->GetClassifier();
106         trained = Train(classifier, positive, trainRatio);
107         if(!trained) break;
108         if(classifier->rocdata.size()>0)
109         {
110             fmeasures[0].push_back(GetBestFMeasure(classifier->rocdata[0]));
111         }
112         if(classifier->rocdata.size()>1)
113         {
114             fmeasures[1].push_back(GetBestFMeasure(classifier->rocdata[1]));
115         }
116     }
117     classifier->crossval = fmeasures;
118     ShowCross();
119     if(trained) classifiers[tab]->Draw(canvas, classifier);
120     UpdateInfo();
121 }
122
123 void MLDemos::Regression()
124 {
125     if(!canvas || !canvas->data->GetCount()) return;
126     drawTimer->Stop();
127     QMutexLocker lock(&mutex);
128     DEL(clusterer);
129     DEL(regressor);
130     DEL(dynamical);
131     DEL(classifier);
132     int tab = optionsRegress->tabWidget->currentIndex();
133     if(tab >= regressors.size() || !regressors[tab]) return;
134     regressor = regressors[tab]->GetRegressor();
135     tabUsedForTraining = tab;
136
137     float ratios [] = {.1f,.25f,1.f/3.f,.5f,2.f/3.f,.75f,.9f,1.f};
138     int ratioIndex = optionsRegress->traintestRatioCombo->currentIndex();
139     float trainRatio = ratios[ratioIndex];
140
141     if(drawTimer)
142     {
143         drawTimer->Stop();
144         drawTimer->Clear();
145     }
146     Train(regressor, trainRatio);
147     regressors[tab]->Draw(canvas, regressor);
148     UpdateInfo();
149 }
150
151 void MLDemos::RegressionCross()
152 {
153     if(!canvas || !canvas->data->GetCount()) return;
154     drawTimer->Stop();
155     QMutexLocker lock(&mutex);
156     DEL(clusterer);
157     DEL(regressor);
158     DEL(dynamical);
159     DEL(classifier);
160     int tab = optionsRegress->tabWidget->currentIndex();
161     if(tab >= regressors.size() || !regressors[tab]) return;
162     regressor = regressors[tab]->GetRegressor();
163     tabUsedForTraining = tab;
164
165     float ratios [] = {.1f,.25f,1.f/3.f,.5f,2.f/3.f,.75f,.9f,1.f};
166     int ratioIndex = optionsRegress->traintestRatioCombo->currentIndex();
167     float trainRatio = ratios[ratioIndex];
168     int foldCount = optionsRegress->foldCountSpin->value();
169
170     vector<fvec> errors;
171     errors.resize(2);
172     bool trained = false;
173     FOR(f,foldCount)
174     {
175         DEL(regressor);
176         regressor = regressors[tab]->GetRegressor();
177         Train(regressor, trainRatio);
178         if(regressor->trainErrors.size())
179         {
180             errors[0] = regressor->trainErrors;
181         }
182         if(regressor->testErrors.size())
183         {
184             errors[1] = regressor->testErrors;
185         }
186     }
187     regressor->crossval = errors;
188     ShowCross();
189     if(drawTimer)
190     {
191         drawTimer->Stop();
192         drawTimer->Clear();
193     }
194     Train(regressor, trainRatio);
195     regressors[tab]->Draw(canvas, regressor);
196     UpdateInfo();
197 }
198
199 void MLDemos::Dynamize()
200 {
201     if(!canvas || !canvas->data->GetCount() || !canvas->data->GetSequences().size()) return;
202     drawTimer->Stop();
203     QMutexLocker lock(&mutex);
204     DEL(clusterer);
205     DEL(regressor);
206     DEL(dynamical);
207     DEL(classifier);
208     int tab = optionsDynamic->tabWidget->currentIndex();
209     if(tab >= dynamicals.size() || !dynamicals[tab]) return;
210     dynamical = dynamicals[tab]->GetDynamical();
211     tabUsedForTraining = tab;
212
213     if(drawTimer)
214     {
215         drawTimer->Stop();
216         drawTimer->Clear();
217     }
218     Train(dynamical);
219     dynamicals[tab]->Draw(canvas,dynamical);
220
221     // the first index is "none", so we subtract 1
222     int avoidIndex = optionsDynamic->obstacleCombo->currentIndex()-1;
223     if(avoidIndex >=0 && avoidIndex < avoiders.size() && avoiders[avoidIndex])
224     {
225         DEL(dynamical->avoid);
226                 dynamical->avoid = avoiders[avoidIndex]->GetObstacleAvoidance();
227     }
228
229     if(drawTimer)
230     {
231         if(dynamicals[tab]->UsesDrawTimer())
232             drawTimer->start(QThread::NormalPriority);
233     }
234     UpdateInfo();
235 }
236
237 void MLDemos::Avoidance()
238 {
239     if(!canvas || !dynamical) return;
240     drawTimer->Stop();
241     QMutexLocker lock(&mutex);
242     int index = optionsDynamic->obstacleCombo->currentIndex()-1;
243     if(index >=0 && index >= avoiders.size() || !avoiders[index]) return;
244     DEL(dynamical->avoid);
245     dynamical->avoid = avoiders[index]->GetObstacleAvoidance();
246     drawTimer->Clear();
247     drawTimer->start(QThread::NormalPriority);
248 }
249
250 void MLDemos::Cluster()
251 {
252     if(!canvas || !canvas->data->GetCount()) return;
253     drawTimer->Stop();
254     QMutexLocker lock(&mutex);
255     DEL(clusterer);
256     DEL(regressor);
257     DEL(dynamical);
258     DEL(classifier);
259     int tab = optionsCluster->tabWidget->currentIndex();
260     if(tab >= clusterers.size() || !clusterers[tab]) return;
261     clusterer = clusterers[tab]->GetClusterer();
262     tabUsedForTraining = tab;
263     Train(clusterer);
264     if(drawTimer)
265     {
266         drawTimer->Stop();
267         drawTimer->Clear();
268     }
269     clusterers[tab]->Draw(canvas,clusterer);
270     UpdateInfo();
271 }
272
273 void MLDemos::ClusterIterate()
274 {
275     if(!canvas || !canvas->data->GetCount()) return;
276     drawTimer->Stop();
277     int tab = optionsCluster->tabWidget->currentIndex();
278     if(tab >= clusterers.size() || !clusterers[tab]) return;
279     QMutexLocker lock(&mutex);
280     if(!clusterer)
281     {
282         clusterer = clusterers[tab]->GetClusterer();
283         tabUsedForTraining = tab;
284     }
285     else clusterers[tab]->SetParams(clusterer);
286     clusterer->SetIterative(true);
287     Train(clusterer);
288     clusterers[tab]->Draw(canvas,clusterer);
289     UpdateInfo();
290 }
291
292 bool MLDemos::Train(Classifier *classifier, int positive, float trainRatio)
293 {
294     if(!classifier) return false;
295     ivec labels = canvas->data->GetLabels();
296     ivec newLabels;
297     newLabels.resize(labels.size(), 1);
298     if(positive == 0)
299     {
300         FOR(i, labels.size()) newLabels[i] = (!labels[i] || labels[i] == -1) ? 1 : -1;
301     }
302     else
303     {
304         FOR(i, labels.size()) newLabels[i] = (labels[i] == positive) ? 1 : -1;
305     }
306     bool bHasPositive = false, bHasNegative = false;
307     FOR(i, newLabels.size())
308     {
309         if(bHasPositive && bHasNegative) break;
310         bHasPositive |= newLabels[i] == 1;
311         bHasNegative |= newLabels[i] == -1;
312     }
313     if((!bHasPositive || !bHasNegative) && !classifier->SingleClass()) return false;
314
315     classifier->rocdata.clear();
316     classifier->roclabels.clear();
317
318     vector<fvec> samples = canvas->data->GetSamples();
319     if(trainRatio == 1)
320     {
321         classifier->Train(samples, newLabels);
322         // we generate the roc curve for this guy
323         vector<f32pair> rocData;
324         FOR(i, samples.size())
325         {
326             float resp = classifier->Test(samples[i]);
327             rocData.push_back(f32pair(resp, newLabels[i]));
328         }
329         classifier->rocdata.push_back(rocData);
330         classifier->roclabels.push_back("training");
331     }
332     else
333     {
334         int trainCnt = (int)(samples.size()*trainRatio);
335         u32 *perm = randPerm(samples.size());
336         vector<fvec> trainSamples;
337         ivec trainLabels;
338         trainSamples.resize(trainCnt);
339         trainLabels.resize(trainCnt);
340         FOR(i, trainCnt)
341         {
342             trainSamples[i] = samples[perm[i]];
343             trainLabels[i] = newLabels[perm[i]];
344         }
345         classifier->Train(trainSamples, trainLabels);
346
347         // we generate the roc curve for this guy
348         vector<f32pair> rocData;
349         FOR(i, trainCnt)
350         {
351             float resp = classifier->Test(samples[perm[i]]);
352             rocData.push_back(f32pair(resp, newLabels[perm[i]]));
353         }
354         classifier->rocdata.push_back(rocData);
355         classifier->roclabels.push_back("training");
356         rocData.clear();
357         for(int i=trainCnt; i<samples.size(); i++)
358         {
359             float resp = classifier->Test(samples[perm[i]]);
360             rocData.push_back(f32pair(resp, newLabels[perm[i]]));
361         }
362         classifier->rocdata.push_back(rocData);
363         classifier->roclabels.push_back("test");
364         KILL(perm);
365     }
366     bIsRocNew = true;
367     bIsCrossNew = true;
368     SetROCInfo();
369     return true;
370 }
371
372 void MLDemos::Train(Regressor *regressor, float trainRatio)
373 {
374     if(!regressor) return;
375     vector<fvec> samples = canvas->data->GetSamples();
376     ivec labels = canvas->data->GetLabels();
377     fvec trainErrors, testErrors;
378     if(trainRatio == 1.f)
379     {
380         regressor->Train(samples, labels);
381         trainErrors.clear();
382         FOR(i, samples.size())
383         {
384             fvec sample = samples[i];
385             int dim = sample.size();
386             fvec res = regressor->Test(sample);
387             float error = fabs(res[0] - sample[dim-1]);
388             trainErrors.push_back(error);
389         }
390         regressor->trainErrors = trainErrors;
391         regressor->testErrors.clear();
392     }
393     else
394     {
395         int trainCnt = (int)(samples.size()*trainRatio);
396         u32 *perm = randPerm(samples.size());
397         vector<fvec> trainSamples;
398         ivec trainLabels;
399         trainSamples.resize(trainCnt);
400         trainLabels.resize(trainCnt);
401         FOR(i, trainCnt)
402         {
403             trainSamples[i] = samples[perm[i]];
404             trainLabels[i] = labels[perm[i]];
405         }
406         regressor->Train(trainSamples, trainLabels);
407
408         FOR(i, trainCnt)
409         {
410             fvec sample = samples[perm[i]];
411             int dim = sample.size();
412             fvec res = regressor->Test(sample);
413             float error = fabs(res[0] - sample[dim-1]);
414             trainErrors.push_back(error);
415         }
416         for(int i=trainCnt; i<samples.size(); i++)
417         {
418             fvec sample = samples[perm[i]];
419             int dim = sample.size();
420             fvec res = regressor->Test(sample);
421             float error = fabs(res[0] - sample[dim-1]);
422             testErrors.push_back(error);
423         }
424         regressor->trainErrors = trainErrors;
425         regressor->testErrors = testErrors;
426         KILL(perm);
427     }
428     bIsCrossNew = true;
429 }
430
431 void MLDemos::Train(Dynamical *dynamical)
432 {
433     if(!dynamical) return;
434     vector<fvec> samples = canvas->data->GetSamples();
435     vector<ipair> sequences = canvas->data->GetSequences();
436     ivec labels = canvas->data->GetLabels();
437     if(!samples.size() || !sequences.size()) return;
438     int dim = samples[0].size();
439     int count = optionsDynamic->resampleSpin->value();
440     int resampleType = optionsDynamic->resampleCombo->currentIndex();
441     int centerType = optionsDynamic->centerCombo->currentIndex();
442     bool zeroEnding = optionsDynamic->zeroCheck->isChecked();
443
444     // we split the data into trajectories
445     vector< vector<fvec> > trajectories;
446     ivec trajLabels;
447     trajectories.resize(sequences.size());
448     trajLabels.resize(sequences.size());
449     FOR(i, sequences.size())
450     {
451         int length = sequences[i].second-sequences[i].first+1;
452         trajLabels[i] = canvas->data->GetLabel(sequences[i].first);
453         trajectories[i].resize(length);
454         FOR(j, length)
455         {
456             trajectories[i][j].resize(dim*2);
457             // copy data
458             FOR(d, dim) trajectories[i][j][d] = samples[sequences[i].first + j][d];
459         }
460     }
461
462     switch(resampleType)
463     {
464     case 0: // none
465     {
466         FOR(i,sequences.size())
467         {
468             int cnt = sequences[i].second-sequences[i].first+1;
469             if(count > cnt) count = cnt;
470         }
471         FOR(i, trajectories.size())
472         {
473             while(trajectories[i].size() > count) trajectories[i].pop_back();
474         }
475     }
476         break;
477     case 1: // uniform
478     {
479         FOR(i, trajectories.size())
480         {
481             vector<fvec> trajectory = trajectories[i];
482             trajectories[i] = interpolate(trajectory, count);
483         }
484     }
485         break;
486     }
487
488
489     if(centerType)
490     {
491         map<int,int> counts;
492         map<int,fvec> centers;
493         FOR(i, sequences.size())
494         {
495             int index = centerType ? sequences[i].second : sequences[i].first; // start
496             int label = canvas->data->GetLabel(index);
497             if(!centers.count(label))
498             {
499                 fvec center;
500                 center.resize(2,0);
501                 centers[label] = center;
502                 counts[label] = 0;
503             }
504             centers[label] += samples[index];
505             counts[label]++;
506         }
507         for(map<int,int>::iterator p = counts.begin(); p!=counts.end(); ++p)
508         {
509             int label = p->first;
510             centers[label] /= p->second;
511         }
512         FOR(i, trajectories.size())
513         {
514             fvec difference = centers[trajLabels[i]] - trajectories[i][count-1];
515             FOR(j, count) trajectories[i][j] += difference;
516         }
517     }
518
519     //float dT = 10.f; // time span between each data frame
520     float dT = optionsDynamic->dtSpin->value();
521     dynamical->dT = dT;
522     //dT = 10.f;
523
524     float maxV = -FLT_MAX;
525     // we compute the velocity
526     FOR(i, trajectories.size())
527     {
528         FOR(j, count-1)
529         {
530             FOR(d, dim)
531             {
532                 float velocity = (trajectories[i][j+1][d] - trajectories[i][j][d]) / dT;
533                 trajectories[i][j][dim + d] = velocity;
534                 if(velocity > maxV) maxV = velocity;
535             }
536         }
537         if(!zeroEnding)
538         {
539             FOR(d, dim)
540             {
541                 trajectories[i][count-1][dim + d] = trajectories[i][count-2][dim + d];
542             }
543         }
544     }
545
546     // we normalize the velocities as the variance of the data
547     fvec mean, sigma;
548     mean.resize(dim,0);
549     int cnt = 0;
550     sigma.resize(dim,0);
551     FOR(i, trajectories.size())
552     {
553         FOR(j, count)
554         {
555             mean += trajectories[i][j];
556             cnt++;
557         }
558     }
559     mean /= cnt;
560     FOR(i, trajectories.size())
561     {
562         FOR(j, count)
563         {
564             fvec diff = (mean - trajectories[i][j]);
565             FOR(d,dim) sigma[d] += diff[d]*diff[d];
566         }
567     }
568     sigma /= cnt;
569
570     FOR(i, trajectories.size())
571     {
572         FOR(j, count)
573         {
574             FOR(d, dim)
575             {
576                 trajectories[i][j][dim + d] /= maxV;
577                 //trajectories[i][j][dim + d] /= sqrt(sigma[d]);
578             }
579         }
580     }
581
582     dynamical->Train(trajectories, labels);
583 }
584
585 void MLDemos::Train(Clusterer *clusterer)
586 {
587     if(!clusterer) return;
588     clusterer->Train(canvas->data->GetSamples());
589 }
590
591 void MLDemos::ExportOutput()
592 {
593     if(!classifier && !regressor && !clusterer) return;
594     // get a file
595 }
596
597 void MLDemos::ExportAnimation()
598 {
599     if(!canvas->data->GetSamples().size()) return;
600 }
601