Teh first one
[mldemos:kalians-mldemos.git] / _AlgorithmsPlugins / KernelMethods / dlib / image_keypoint / hessian_pyramid.h
1 // Copyright (C) 2009  Davis E. King (davis@dlib.net)\r
2 // License: Boost Software License   See LICENSE.txt for the full license.\r
3 #ifndef DLIB_HESSIAN_PYRAMId_H__\r
4 #define DLIB_HESSIAN_PYRAMId_H__\r
5 \r
6 #include "hessian_pyramid_abstract.h"\r
7 #include "../algs.h"\r
8 #include "../image_transforms/integral_image.h"\r
9 #include "../array.h"\r
10 #include "../array2d.h"\r
11 #include "../noncopyable.h"\r
12 #include "../matrix.h"\r
13 #include "../stl_checked.h"\r
14 #include <algorithm>\r
15 #include <vector>\r
16 \r
17 namespace dlib\r
18 {\r
19 \r
20 // ----------------------------------------------------------------------------------------\r
21 \r
22     struct interest_point\r
23     {\r
24         interest_point() : scale(0), score(0), laplacian(0) {}\r
25 \r
26         dlib::vector<double,2> center;\r
27         double scale;\r
28         double score;\r
29         double laplacian;\r
30 \r
31         bool operator < (const interest_point& p) const { return score < p.score; }\r
32     };\r
33 \r
34 // ----------------------------------------------------------------------------------------\r
35 \r
36     inline void serialize(\r
37         const interest_point& item,  \r
38         std::ostream& out\r
39     )\r
40     {\r
41         try\r
42         {\r
43             serialize(item.center,out);\r
44             serialize(item.scale,out);\r
45             serialize(item.score,out);\r
46             serialize(item.laplacian,out);\r
47         }\r
48         catch (serialization_error& e)\r
49         { \r
50             throw serialization_error(e.info + "\n   while serializing object of type interest_point"); \r
51         }\r
52     }\r
53 \r
54 // ----------------------------------------------------------------------------------------\r
55 \r
56     inline void deserialize(\r
57         interest_point& item,  \r
58         std::istream& in \r
59     )\r
60     {\r
61         try\r
62         {\r
63             deserialize(item.center,in);\r
64             deserialize(item.scale,in);\r
65             deserialize(item.score,in);\r
66             deserialize(item.laplacian,in);\r
67         }\r
68         catch (serialization_error& e)\r
69         { \r
70             throw serialization_error(e.info + "\n   while deserializing object of type interest_point"); \r
71         }\r
72     }\r
73 \r
74 // ----------------------------------------------------------------------------------------\r
75 \r
76     class hessian_pyramid : noncopyable\r
77     {\r
78     public:\r
79         hessian_pyramid()\r
80         {\r
81             num_octaves = 0;\r
82             num_intervals = 0;\r
83             initial_step_size = 0;\r
84         }\r
85 \r
86         template <typename integral_image_type>\r
87         void build_pyramid (\r
88             const integral_image_type& img,\r
89             long num_octaves,\r
90             long num_intervals,\r
91             long initial_step_size\r
92         )\r
93         {\r
94             DLIB_ASSERT(num_octaves > 0 && num_intervals > 0 && initial_step_size > 0,\r
95                 "\tvoid build_pyramid()"\r
96                 << "\n\tAll arguments to this function must be > 0"\r
97                 << "\n\t this:              " << this\r
98                 << "\n\t num_octaves:       " << num_octaves \r
99                 << "\n\t num_intervals:     " << num_intervals \r
100                 << "\n\t initial_step_size: " << initial_step_size \r
101             );\r
102 \r
103             this->num_octaves = num_octaves;\r
104             this->num_intervals = num_intervals;\r
105             this->initial_step_size = initial_step_size;\r
106 \r
107             // allocate space for the pyramid\r
108             pyramid.resize(num_octaves*num_intervals);\r
109             for (long o = 0; o < num_octaves; ++o)\r
110             {\r
111                 const long step_size = get_step_size(o);\r
112                 for (long i = 0; i < num_intervals; ++i)\r
113                 {\r
114                     pyramid[num_intervals*o + i].set_size(img.nr()/step_size, img.nc()/step_size);\r
115                 }\r
116             }\r
117 \r
118             // now fill out the pyramid with data\r
119             for (long o = 0; o < num_octaves; ++o)\r
120             {\r
121                 const long step_size = get_step_size(o);\r
122                 const long border_size = get_border_size(o)*step_size;\r
123 \r
124                 for (long i = 0; i < num_intervals; ++i)\r
125                 {\r
126                     const long lobe_size = static_cast<long>(std::pow(2.0, o+1.0)+0.5)*(i+1) + 1;\r
127                     const double area_inv = 1.0/std::pow(3.0*lobe_size, 2.0);\r
128 \r
129                     const long lobe_offset = lobe_size/2+1;\r
130                     const point tl(-lobe_offset,-lobe_offset);\r
131                     const point tr(lobe_offset,-lobe_offset);\r
132                     const point bl(-lobe_offset,lobe_offset);\r
133                     const point br(lobe_offset,lobe_offset);\r
134 \r
135                     for (long r = border_size; r < img.nr() - border_size; r += step_size)\r
136                     {\r
137                         for (long c = border_size; c < img.nc() - border_size; c += step_size)\r
138                         {\r
139                             const point p(c,r);\r
140 \r
141                             double Dxx = img.get_sum_of_area(centered_rect(p, lobe_size*3, 2*lobe_size-1)) - \r
142                                          img.get_sum_of_area(centered_rect(p, lobe_size,   2*lobe_size-1))*3.0;\r
143 \r
144                             double Dyy = img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size*3)) - \r
145                                          img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size))*3.0;\r
146 \r
147                             double Dxy = img.get_sum_of_area(centered_rect(p+bl, lobe_size, lobe_size)) + \r
148                                          img.get_sum_of_area(centered_rect(p+tr, lobe_size, lobe_size)) -\r
149                                          img.get_sum_of_area(centered_rect(p+tl, lobe_size, lobe_size)) -\r
150                                          img.get_sum_of_area(centered_rect(p+br, lobe_size, lobe_size));\r
151 \r
152                             // now we normalize the filter responses\r
153                             Dxx *= area_inv;\r
154                             Dyy *= area_inv;\r
155                             Dxy *= area_inv;\r
156 \r
157 \r
158                             double sign_of_laplacian = +1;\r
159                             if (Dxx + Dyy < 0)\r
160                                 sign_of_laplacian = -1;\r
161 \r
162                             double determinant = Dxx*Dyy - 0.81*Dxy*Dxy;\r
163 \r
164                             // If the determinant is negative then just blank it out by setting\r
165                             // it to zero.\r
166                             if (determinant < 0)\r
167                                 determinant = 0;\r
168 \r
169                             // Save the determinant of the Hessian into our image pyramid.  Also\r
170                             // pack the laplacian sign into the value so we can get it out later.\r
171                             pyramid[o*num_intervals + i][r/step_size][c/step_size] = sign_of_laplacian*determinant;\r
172 \r
173                         }\r
174                     }\r
175 \r
176                 }\r
177             }\r
178         }\r
179 \r
180         long get_border_size (\r
181             long octave\r
182         ) const\r
183         {\r
184             DLIB_ASSERT(0 <= octave && octave < octaves(),\r
185                 "\tlong get_border_size(octave)"\r
186                 << "\n\tInvalid octave value"\r
187                 << "\n\t this:   " << this\r
188                 << "\n\t octave: " << octave \r
189             );\r
190 \r
191             const double lobe_size = std::pow(2.0, octave+1.0)*(num_intervals+1) + 1;\r
192             const double filter_size = 3*lobe_size;\r
193 \r
194             const long bs = static_cast<long>(std::ceil(filter_size/2.0));\r
195             return bs;\r
196         }\r
197 \r
198         long get_step_size (\r
199             long octave\r
200         ) const\r
201         {\r
202             DLIB_ASSERT(0 <= octave && octave < octaves(),\r
203                 "\tlong get_step_size(octave)"\r
204                 << "\n\tInvalid octave value"\r
205                 << "\n\t this:   " << this\r
206                 << "\n\t octave: " << octave \r
207             );\r
208 \r
209             return initial_step_size*static_cast<long>(std::pow(2.0, (double)octave)+0.5);\r
210         }\r
211 \r
212         long nr (\r
213             long octave\r
214         ) const\r
215         {\r
216             DLIB_ASSERT(0 <= octave && octave < octaves(),\r
217                 "\tlong nr(octave)"\r
218                 << "\n\tInvalid octave value"\r
219                 << "\n\t this:   " << this\r
220                 << "\n\t octave: " << octave \r
221             );\r
222 \r
223             return pyramid[num_intervals*octave].nr();\r
224         }\r
225 \r
226         long nc (\r
227             long octave\r
228         ) const\r
229         {\r
230             DLIB_ASSERT(0 <= octave && octave < octaves(),\r
231                 "\tlong nc(octave)"\r
232                 << "\n\tInvalid octave value"\r
233                 << "\n\t this:   " << this\r
234                 << "\n\t octave: " << octave \r
235             );\r
236 \r
237             return pyramid[num_intervals*octave].nc();\r
238         }\r
239 \r
240         double get_value (\r
241             long octave,\r
242             long interval,\r
243             long r,\r
244             long c\r
245         ) const\r
246         {\r
247             DLIB_ASSERT(0 <= octave && octave < octaves() &&\r
248                         0 <= interval && interval < intervals() &&\r
249                         get_border_size(octave) <= r && r < nr(octave)-get_border_size(octave) &&\r
250                         get_border_size(octave) <= c && c < nc(octave)-get_border_size(octave),\r
251                 "\tdouble get_value(octave, interval, r, c)"\r
252                 << "\n\tInvalid inputs to this function"\r
253                 << "\n\t this:      " << this\r
254                 << "\n\t octave:    " << octave \r
255                 << "\n\t interval:  " << interval \r
256                 << "\n\t octaves:   " << octaves() \r
257                 << "\n\t intervals: " << intervals()\r
258                 << "\n\t r:         " << r  \r
259                 << "\n\t c:         " << c \r
260                 << "\n\t nr(octave): " << nr(octave)  \r
261                 << "\n\t nc(octave): " << nc(octave) \r
262                 << "\n\t get_border_size(octave): " << get_border_size(octave) \r
263             );\r
264 \r
265             return std::abs(pyramid[num_intervals*octave + interval][r][c]);\r
266         }\r
267 \r
268         double get_laplacian (\r
269             long octave,\r
270             long interval,\r
271             long r,\r
272             long c\r
273         ) const\r
274         {\r
275             DLIB_ASSERT(0 <= octave && octave < octaves() &&\r
276                         0 <= interval && interval < intervals() &&\r
277                         get_border_size(octave) <= r && r < nr(octave)-get_border_size(octave) &&\r
278                         get_border_size(octave) <= c && c < nc(octave)-get_border_size(octave),\r
279                 "\tdouble get_laplacian(octave, interval, r, c)"\r
280                 << "\n\tInvalid inputs to this function"\r
281                 << "\n\t this:      " << this\r
282                 << "\n\t octave:    " << octave \r
283                 << "\n\t interval:  " << interval \r
284                 << "\n\t octaves:   " << octaves() \r
285                 << "\n\t intervals: " << intervals()\r
286                 << "\n\t r:         " << r  \r
287                 << "\n\t c:         " << c \r
288                 << "\n\t nr(octave): " << nr(octave)  \r
289                 << "\n\t nc(octave): " << nc(octave) \r
290                 << "\n\t get_border_size(octave): " << get_border_size(octave) \r
291             );\r
292 \r
293             // return the sign of the laplacian\r
294             if (pyramid[num_intervals*octave + interval][r][c] > 0)\r
295                 return +1;\r
296             else\r
297                 return -1;\r
298         }\r
299 \r
300         long octaves (\r
301         ) const { return num_octaves; }\r
302 \r
303         long intervals (\r
304         ) const { return num_intervals; }\r
305 \r
306     private:\r
307 \r
308         long num_octaves;\r
309         long num_intervals;\r
310         long initial_step_size;\r
311 \r
312         typedef array2d<double>::kernel_1a image_type;\r
313         typedef array<image_type>::expand_1d pyramid_type;\r
314 \r
315         pyramid_type pyramid;\r
316     };\r
317 \r
318 // ----------------------------------------------------------------------------------------\r
319 // ----------------------------------------------------------------------------------------\r
320 // ----------------------------------------------------------------------------------------\r
321 \r
322     namespace hessian_pyramid_helpers\r
323     {\r
324         inline bool is_maximum_in_region(\r
325             const hessian_pyramid& pyr,\r
326             long o, \r
327             long i, \r
328             long r, \r
329             long c\r
330         )\r
331         {\r
332             // First check if this point is near the edge of the octave \r
333             // If it is then we say it isn't a maximum as these points are\r
334             // not as reliable.\r
335             if (i <= 0 || i+1 >= pyr.intervals())\r
336             {\r
337                 return false;\r
338             }\r
339 \r
340             const double val = pyr.get_value(o,i,r,c);\r
341 \r
342             // now check if there are any bigger values around this guy\r
343             for (long ii = i-1; ii <= i+1; ++ii)\r
344             {\r
345                 for (long rr = r-1; rr <= r+1; ++rr)\r
346                 {\r
347                     for (long cc = c-1; cc <= c+1; ++cc)\r
348                     {\r
349                         if (pyr.get_value(o,ii,rr,cc) > val)\r
350                             return false;\r
351                     }\r
352                 }\r
353             }\r
354 \r
355             return true;\r
356         }\r
357 \r
358     // ------------------------------------------------------------------------------------\r
359 \r
360         inline const matrix<double,3,1> get_hessian_gradient (\r
361             const hessian_pyramid& pyr,\r
362             long o, \r
363             long i, \r
364             long r, \r
365             long c\r
366         )\r
367         {\r
368             matrix<double,3,1> grad;\r
369             grad(0) = (pyr.get_value(o,i,r,c+1) - pyr.get_value(o,i,r,c-1))/2.0;\r
370             grad(1) = (pyr.get_value(o,i,r+1,c) - pyr.get_value(o,i,r-1,c))/2.0;\r
371             grad(2) = (pyr.get_value(o,i+1,r,c) - pyr.get_value(o,i-1,r,c))/2.0;\r
372             return grad;\r
373         }\r
374 \r
375     // ------------------------------------------------------------------------------------\r
376 \r
377         inline const matrix<double,3,3> get_hessian_hessian (\r
378             const hessian_pyramid& pyr,\r
379             long o, \r
380             long i, \r
381             long r, \r
382             long c\r
383         )\r
384         {\r
385             matrix<double,3,3> hess;\r
386             const double val = pyr.get_value(o,i,r,c);\r
387 \r
388             double Dxx = (pyr.get_value(o,i,r,c+1) + pyr.get_value(o,i,r,c-1)) - 2*val;\r
389             double Dyy = (pyr.get_value(o,i,r+1,c) + pyr.get_value(o,i,r-1,c)) - 2*val;\r
390             double Dss = (pyr.get_value(o,i+1,r,c) + pyr.get_value(o,i-1,r,c)) - 2*val;\r
391 \r
392             double Dxy = (pyr.get_value(o,i,r+1,c+1) + pyr.get_value(o,i,r-1,c-1) -\r
393                           pyr.get_value(o,i,r-1,c+1) - pyr.get_value(o,i,r+1,c-1)) / 4.0;\r
394 \r
395             double Dxs = (pyr.get_value(o,i+1,r,c+1) + pyr.get_value(o,i-1,r,c-1) -\r
396                           pyr.get_value(o,i-1,r,c+1) - pyr.get_value(o,i+1,r,c-1)) / 4.0;\r
397 \r
398             double Dys = (pyr.get_value(o,i+1,r+1,c) + pyr.get_value(o,i-1,r-1,c) -\r
399                           pyr.get_value(o,i-1,r+1,c) - pyr.get_value(o,i+1,r-1,c)) / 4.0;\r
400 \r
401 \r
402             hess = Dxx, Dxy, Dxs,\r
403             Dxy, Dyy, Dys,\r
404             Dxs, Dys, Dss;\r
405 \r
406             return hess;\r
407         }\r
408 \r
409     // ------------------------------------------------------------------------------------\r
410 \r
411         inline const interest_point interpolate_point (\r
412             const hessian_pyramid& pyr, \r
413             long o, \r
414             long i, \r
415             long r, \r
416             long c\r
417         )\r
418         {\r
419             dlib::vector<double,2> p(c,r);\r
420 \r
421             dlib::vector<double,3> start_point(c,r,i);\r
422             dlib::vector<double,3> interpolated_point = -inv(get_hessian_hessian(pyr,o,i,r,c))*get_hessian_gradient(pyr,o,i,r,c);\r
423 \r
424             //cout << "inter: " <<  trans(interpolated_point);\r
425 \r
426             interest_point temp;\r
427             if (max(abs(interpolated_point)) < 0.5)\r
428             {\r
429                 p = (start_point+interpolated_point)*pyr.get_step_size(o);\r
430                 const double lobe_size = std::pow(2.0, o+1.0)*(i+interpolated_point.z()+1) + 1;\r
431                 const double filter_size = 3*lobe_size;\r
432                 const double scale = 1.2/9.0 * filter_size;\r
433 \r
434                 temp.center = p;\r
435                 temp.scale = scale;\r
436                 temp.score = pyr.get_value(o,i,r,c);\r
437                 temp.laplacian = pyr.get_laplacian(o,i,r,c);\r
438             }\r
439 \r
440             return temp;\r
441         }\r
442 \r
443     }\r
444 \r
445 // ----------------------------------------------------------------------------------------\r
446 \r
447     template <typename Alloc>\r
448     void get_interest_points (\r
449         const hessian_pyramid& pyr,\r
450         double threshold,\r
451         std::vector<interest_point,Alloc>& result_points\r
452     )\r
453     {\r
454         DLIB_ASSERT(threshold >= 0,\r
455             "\tvoid get_interest_points()"\r
456             << "\n\t Invalid arguments to this function"\r
457             << "\n\t threshold: " << threshold \r
458         );\r
459         using namespace std;\r
460         using namespace hessian_pyramid_helpers;\r
461 \r
462         result_points.clear();\r
463 \r
464         for (long o = 0; o < pyr.octaves(); ++o)\r
465         {\r
466             const long border_size = pyr.get_border_size(o);\r
467             const long nr = pyr.nr(o);\r
468             const long nc = pyr.nc(o);\r
469 \r
470             // do non-maximum suppression on all the intervals in the current octave and \r
471             // accumulate the results in result_points\r
472             for (long i = 1; i < pyr.intervals()-1;  i += 3)\r
473             {\r
474                 for (long r = border_size+1; r < nr - border_size-1; r += 3)\r
475                 {\r
476                     for (long c = border_size+1; c < nc - border_size-1; c += 3)\r
477                     {\r
478                         double max_val = pyr.get_value(o,i,r,c);\r
479                         long max_i = i;\r
480                         long max_r = r;\r
481                         long max_c = c;\r
482 \r
483                         // loop over this 3x3x3 block and find the largest element\r
484                         for (long ii = i; ii < std::min(i + 3, pyr.intervals()-1); ++ii)\r
485                         {\r
486                             for (long rr = r; rr < std::min(r + 3, nr - border_size - 1); ++rr)\r
487                             {\r
488                                 for (long cc = c; cc < std::min(c + 3, nc - border_size - 1); ++cc)\r
489                                 {\r
490                                     double temp = pyr.get_value(o,ii,rr,cc);\r
491                                     if (temp > max_val)\r
492                                     {\r
493                                         max_val = temp;\r
494                                         max_i = ii;\r
495                                         max_r = rr;\r
496                                         max_c = cc;\r
497                                     }\r
498                                 }\r
499                             }\r
500                         }\r
501 \r
502                         // If the max point we found is really a maximum in its own region and\r
503                         // is big enough then add it to the results.\r
504                         if (max_val > threshold && is_maximum_in_region(pyr, o, max_i, max_r, max_c))\r
505                         {\r
506                             //cout << max_val << endl;\r
507                             interest_point sp = interpolate_point (pyr, o, max_i, max_r, max_c);\r
508                             if (sp.score > threshold)\r
509                             {\r
510                                 result_points.push_back(sp);\r
511                             }\r
512                         }\r
513 \r
514                     }\r
515                 }\r
516             }\r
517         }\r
518 \r
519     }\r
520 \r
521 // ----------------------------------------------------------------------------------------\r
522 \r
523     template <typename Alloc>\r
524     void get_interest_points (\r
525         const hessian_pyramid& pyr,\r
526         double threshold,\r
527         std_vector_c<interest_point,Alloc>& result_points\r
528     )\r
529     /*!\r
530         This function is just an overload that automatically casts std_vector_c objects\r
531         into std::vector objects.  (Usually this is automatic but the template argument\r
532         there messes up the conversion so we have to do it explicitly)\r
533     !*/\r
534     {\r
535         std::vector<interest_point,Alloc>& v = result_points;\r
536         get_interest_points(pyr, threshold, v);\r
537     }\r
538 \r
539 // ----------------------------------------------------------------------------------------\r
540 \r
541 }\r
542 \r
543 #endif  // DLIB_HESSIAN_PYRAMID_H__\r
544 \r