Fixed the display problem related to resizing the windows on mac.
[mldemos:auto-amcs-auto-amcs-mldemos.git] / _AlgorithmsPlugins / LinearMethods / classifierBoost.cpp
1 /*********************************************************************\r
2 MLDemos: A User-Friendly visualization toolkit for machine learning\r
3 Copyright (C) 2010  Basilio Noris\r
4 Contact: mldemos@b4silio.com\r
5 \r
6 This library is free software; you can redistribute it and/or\r
7 modify it under the terms of the GNU Lesser General Public\r
8 License as published by the Free Software Foundation; either\r
9 version 2.1 of the License, or (at your option) any later version.\r
10 \r
11 This library is distributed in the hope that it will be useful,\r
12 but WITHOUT ANY WARRANTY; without even the implied warranty of\r
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
14 Library General Public License for more details.\r
15 \r
16 You should have received a copy of the GNU Lesser General Public\r
17 License along with this library; if not, write to the Free\r
18 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\r
19 *********************************************************************/\r
20 #include "public.h"\r
21 #include "basicMath.h"\r
22 #include "classifierBoost.h"\r
23 \r
24 using namespace std;\r
25 \r
26 ClassifierBoost::ClassifierBoost()\r
27 : model(0), weakCount(0), scoreMultiplier(1.f)\r
28 {\r
29         bSingleClass = false;\r
30         type = CLASS_BOOST;\r
31 }\r
32 \r
33 ClassifierBoost::~ClassifierBoost()\r
34 {\r
35         if(model) model->clear();\r
36         DEL(model);\r
37 }\r
38 \r
39 vector<fvec> learners;\r
40 int currentLearnerType = -1;\r
41 \r
42 void ClassifierBoost::Train( std::vector< fvec > samples, ivec labels )\r
43 {\r
44         if(model)model->clear();\r
45         u32 sampleCnt = samples.size();\r
46         if(!sampleCnt) return;\r
47         DEL(model);\r
48         dim = samples[0].size();\r
49         u32 *perm = randPerm(sampleCnt);\r
50 \r
51         int learnerCount = max((!weakType?360 : 1000), (int)weakCount);\r
52         if(currentLearnerType != weakType)\r
53         {\r
54                 srand(1); // so we always generate the same weak learner\r
55                 learners.clear();\r
56                 learners.resize(learnerCount);\r
57                 // we generate a bunch of random directions as learners\r
58         //      srand(1);\r
59                 if(weakType != 1) // random projection\r
60                 {\r
61                         if(dim==2)\r
62                         {\r
63                                 FOR(i, learnerCount)\r
64                                 {\r
65                                                 learners[i].resize(dim);\r
66                                                 if(!weakType)\r
67                                                 {\r
68                                                         float theta = i / (float)learnerCount * PIf;\r
69                                                         //                      float theta = rand()/(float)RAND_MAX*PIf;\r
70                                                         learners[i][0] = cosf(theta);\r
71                                                         learners[i][1] = sinf(theta);\r
72                                                 }\r
73                                                 else\r
74                                                 {\r
75                                                         learners[i][0] = rand()/(float)RAND_MAX;\r
76                                                         learners[i][1] = rand()/(float)RAND_MAX;\r
77                                                 }\r
78                                 }\r
79                         }\r
80                         else\r
81                         {\r
82                                 FOR(i, learnerCount)\r
83                                 {\r
84                                         learners[i].resize(dim);\r
85                                         if(!weakType)\r
86                                         {\r
87                                                 FOR(d, dim) learners[i][d] = rand()/(float)RAND_MAX*2. -1.;\r
88                                         }\r
89                                         else\r
90                                         {\r
91                                                 FOR(d, dim) learners[i][d] = rand()/(float)RAND_MAX;\r
92                                         }\r
93                                 }\r
94                         }\r
95                 }\r
96                 else // random rectangle\r
97                 {\r
98                         // we need to find the boundaries\r
99                         float *xMin = new float[dim];\r
100                         float *xMax = new float[dim];\r
101                         FOR(i, samples.size())\r
102                         {\r
103                                 FOR(d,dim)\r
104                                 {\r
105                                         if(xMin[d] > samples[i][d]) xMin[d] = samples[i][d];\r
106                                         if(xMax[d] < samples[i][d]) xMax[d] = samples[i][d];\r
107                                 }\r
108                         }\r
109 \r
110                         FOR(i, learnerCount)\r
111                         {\r
112                                 learners[i].resize(dim*2);\r
113                                 FOR(d, dim)\r
114                                 {\r
115                                         float x = (rand() / (float)RAND_MAX)*(xMax[d] - xMin[d]) + xMin[d]; // starting point\r
116                                         float l = (rand() / (float)RAND_MAX)*(xMax[d] - xMin[d]); // width\r
117                                         learners[i][2*d] = x;\r
118                                         learners[i][2*d+1] = l;\r
119                                 }\r
120                         }\r
121                         delete [] xMin;\r
122                         delete [] xMax;\r
123                 }\r
124                 currentLearnerType = weakType;\r
125         }\r
126 \r
127         CvMat *trainSamples = cvCreateMat(sampleCnt, learnerCount, CV_32FC1);\r
128         CvMat *trainLabels = cvCreateMat(labels.size(), 1, CV_32FC1);\r
129         CvMat *sampleWeights = cvCreateMat(samples.size(), 1, CV_32FC1);\r
130 \r
131         if(weakType != 1)\r
132         {\r
133                 if(dim == 2)\r
134                 {\r
135                         FOR(i, sampleCnt)\r
136                         {\r
137                                 FOR(j, learnerCount)\r
138                                 {\r
139                                         float val = 0;\r
140                                         if(!weakType)\r
141                                         {\r
142                                                 val = samples[perm[i]][0]* learners[j][0] + samples[perm[i]][1]* learners[j][1];\r
143                                         }\r
144                                         else\r
145                                         {\r
146                                                 val = sqrtf((samples[perm[i]][0] - learners[j][0])*(samples[perm[i]][0] - learners[j][0])+\r
147                                                         (samples[perm[i]][1] - learners[j][1])*(samples[perm[i]][1] - learners[j][1]));\r
148                                         }\r
149                                         cvSetReal2D(trainSamples, i, j, val);\r
150                                 }\r
151                                 cvSet1D(trainLabels, i, cvScalar((float)labels[perm[i]]));\r
152                                 cvSet1D(sampleWeights, i, cvScalar(1));\r
153                         }\r
154 \r
155                 }\r
156                 else\r
157                 {\r
158                         FOR(i, sampleCnt)\r
159                         {\r
160                                 // project the sample in the direction of the learner \r
161                                 fvec sample = samples[perm[i]];\r
162                                 FOR(j, learnerCount)\r
163                                 {\r
164                                         float val = 0;\r
165                                         if(!weakType) sample * learners[j];\r
166                                         else\r
167                                         {\r
168                                                 FOR(d,dim) val += (sample[d] - learners[j][d])*(sample[d] - learners[j][d]);\r
169                                                 val = sqrtf(val);\r
170                                         }\r
171                                         cvSetReal2D(trainSamples, i, j, val);\r
172                                 }\r
173                                 cvSet1D(trainLabels, i, cvScalar((float)labels[perm[i]]));\r
174                                 cvSet1D(sampleWeights, i, cvScalar(1));\r
175                         }\r
176                 }\r
177         }\r
178         else\r
179         {\r
180                 FOR(i, sampleCnt)\r
181                 {\r
182                         // check if the sample is inside the recangle generated by the classifier\r
183                         const fvec sample = samples[perm[i]];\r
184                         FOR(j, learnerCount)\r
185                         {\r
186                                 float val = 1;\r
187                                 FOR(d, dim)\r
188                                 {\r
189                                         if(sample[d] < learners[j][2*d] || sample[d] > learners[j][2*d]+learners[j][2*d+1])\r
190                                         {\r
191                                                 val = 0;\r
192                                                 break;\r
193                                         }\r
194                                 }\r
195                                 cvSetReal2D(trainSamples, i, j, val);\r
196                         }\r
197                         cvSet1D(trainLabels, i, cvScalar((float)labels[perm[i]]));\r
198                         cvSet1D(sampleWeights, i, cvScalar(1));\r
199                 }\r
200         }\r
201         delete [] perm;\r
202 \r
203         CvMat *varType = cvCreateMat(trainSamples->width+1, 1, CV_8UC1);\r
204         FOR(i, trainSamples->width)\r
205         {\r
206                 CV_MAT_ELEM(*varType, u8, i, 0) = CV_VAR_NUMERICAL;\r
207         }\r
208         CV_MAT_ELEM(*varType, u8, trainSamples->width, 0) = CV_VAR_CATEGORICAL;\r
209 \r
210         int maxSplit = 1;\r
211         CvBoostParams params(CvBoost::GENTLE, weakCount, 0.95, maxSplit, false, NULL);\r
212         params.split_criteria = CvBoost::DEFAULT;\r
213         model = new CvBoost();\r
214         model->train(trainSamples, CV_ROW_SAMPLE, trainLabels, NULL, NULL, varType, NULL, params);\r
215 \r
216         scoreMultiplier = 1.f;\r
217         float maxScore=-FLT_MAX, minScore=FLT_MAX;\r
218         FOR(i, samples.size())\r
219         {\r
220                 float score = Test(samples[i]);\r
221                 if(score > maxScore) maxScore = score;\r
222                 if(score < minScore) minScore = score;\r
223         }\r
224         if(minScore != maxScore)\r
225         {\r
226                 scoreMultiplier = 1.f/(max(abs((double)maxScore),abs((double)minScore)))*5.f;\r
227         }\r
228 \r
229         CvSeq *predictors = model->get_weak_predictors();\r
230         int length = cvSliceLength(CV_WHOLE_SEQ, predictors);\r
231         features.clear();\r
232         FOR(i, length)\r
233         {\r
234                 CvBoostTree *predictor = *CV_SEQ_ELEM(predictors, CvBoostTree*, i);\r
235                 CvDTreeSplit *split = predictor->get_root()->split;\r
236                 features.push_back(split->var_idx);\r
237         }\r
238 \r
239         cvReleaseMat(&trainSamples);\r
240         cvReleaseMat(&trainLabels);\r
241         cvReleaseMat(&sampleWeights);\r
242         cvReleaseMat(&varType);\r
243         trainSamples = 0;\r
244         trainLabels = 0;\r
245         sampleWeights = 0;\r
246         varType = 0;\r
247         bFixedThreshold = false;\r
248         classSpan = 0.01f;\r
249 }\r
250 \r
251 float ClassifierBoost::Test( const fvec &sample )\r
252 {\r
253         if(!model) return 0;\r
254         if(!learners.size()) return 0;\r
255 \r
256         CvMat *x = cvCreateMat(1, learners.size(), CV_32FC1);\r
257         if(weakType != 1)\r
258         {\r
259                 if(dim == 2)\r
260                 {\r
261                         FOR(i, features.size())\r
262                         {\r
263                                 float val = 0;\r
264                                 if(!weakType)\r
265                                 {\r
266                                         val = sample[0] * learners[features[i]][0] + sample[1] * learners[features[i]][1];\r
267                                 }\r
268                                 else\r
269                                 {\r
270                                         val = sqrtf((sample[0] - learners[features[i]][0])*(sample[0] - learners[features[i]][0])+\r
271                                                 (sample[1] - learners[features[i]][1])*(sample[1] - learners[features[i]][1]));\r
272                                 }\r
273                                 cvSetReal2D(x, 0, features[i], val);\r
274                         }\r
275                 }\r
276                 else\r
277                 {\r
278                         FOR(i, features.size())\r
279                         {\r
280                                 float val = 0;\r
281                                 if(!weakType) sample * learners[features[i]];\r
282                                 else\r
283                                 {\r
284                                         FOR(d,dim) val += (sample[d] - learners[features[i]][d])*(sample[d] - learners[features[i]][d]);\r
285                                         val = sqrtf(val);\r
286                                 }\r
287                                 cvSetReal2D(x, 0, features[i], val);\r
288                         }\r
289                 }\r
290         }\r
291         else\r
292         {\r
293                 FOR(i, features.size())\r
294                 {\r
295                         int val = 1;\r
296                         FOR(d, dim)\r
297                         {\r
298                                 if(sample[d] < learners[features[i]][2*d] ||\r
299                                         sample[d] > learners[features[i]][2*d]+learners[features[i]][2*d+1])\r
300                                 {\r
301                                         val = 0;\r
302                                         break;\r
303                                 }\r
304                         }\r
305                         cvSetReal2D(x, 0, features[i], val);\r
306                 }\r
307         }\r
308 \r
309         // allocate memory for weak learner output\r
310         int length = cvSliceLength(CV_WHOLE_SEQ, model->get_weak_predictors());\r
311         CvMat *weakResponses = cvCreateMat(length, 1, CV_32FC1);\r
312         float y = model->predict(x, NULL, weakResponses, CV_WHOLE_SEQ);\r
313         double score = cvSum(weakResponses).val[0] * scoreMultiplier;\r
314 \r
315         cvReleaseMat(&weakResponses);\r
316         cvReleaseMat(&x);\r
317         return score;\r
318 }\r
319 \r
320 void ClassifierBoost::SetParams( u32 weakCount, int weakType )\r
321 {\r
322         this->weakCount = weakCount;\r
323         this->weakType = weakType ;\r
324 }\r
325 \r
326 char *ClassifierBoost::GetInfoString()\r
327 {\r
328         char *text = new char[255];\r
329         sprintf(text, "Boosting\n");\r
330         sprintf(text, "%sLearners Count: %d\n", text, weakCount);\r
331         sprintf(text, "%sLearners Type: ", text);\r
332         switch(weakType)\r
333         {\r
334         case 0:\r
335                 sprintf(text, "%sRandom Projections\n", text);\r
336                 break;\r
337         case 1:\r
338                 sprintf(text, "%sRandom Rectangles\n", text);\r
339                 break;\r
340         case 2:\r
341                 sprintf(text, "%sRandom Circles\n", text);\r
342                 break;\r
343         }\r
344         return text;\r
345 }\r