omxComputeEstimateHessian reorg 8/11
[openmx:openmx.git] / src / omxHessianCalculation.cpp
1 /*
2  *  Copyright 2007-2013 The OpenMx Project
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *       http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *   Unless required by applicable law or agreed to in writing, software
11  *   distributed under the License is distributed on an "AS IS" BASIS,
12  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16
17 /**
18  * Based on:
19  *
20  * Paul Gilbert and Ravi Varadhan (2012). numDeriv: Accurate Numerical Derivatives. R package
21  * version 2012.9-1. http://CRAN.R-project.org/package=numDeriv
22  *
23  **/
24
25 #include <stdio.h>
26 #include <sys/types.h>
27 #include <errno.h>
28
29 #include <R.h>
30 #include <Rinternals.h>
31 #include <Rdefines.h>
32 #include <R_ext/Rdynload.h>
33 #include <R_ext/BLAS.h>
34 #include <R_ext/Lapack.h>
35
36 #include "omxDefines.h"
37 #include "npsolWrap.h"
38 #include "omxState.h"
39 #include "omxMatrix.h"
40 #include "omxAlgebra.h"
41 #include "omxFitFunction.h"
42 #include "omxNPSOLSpecific.h"
43 #include "omxOptimizer.h"
44 #include "omxOpenmpWrap.h"
45 #include "omxHessianCalculation.h"
46 #include "omxGlobalState.h"
47 #include "omxExportBackendState.h"
48
49 struct hess_struct {
50         int     numParams;
51         double* freeParams;
52         double* Haprox;
53         double* Gaprox;
54         omxMatrix* fitMatrix;
55         double  f0;
56         double  functionPrecision;
57         int r;
58 };
59
60 static void omxPopulateHessianWork(struct hess_struct *hess_work, 
61         double functionPrecision, int r, omxState* state,
62         omxState *parentState) {
63
64         omxFitFunction* oo = state->fitMatrix->fitFunction;
65         int numParams = state->numFreeParams;
66
67         double* optima = parentState->optimalValues;
68         double *freeParams = (double*) Calloc(numParams, double);
69
70         hess_work->numParams = numParams;
71         hess_work->functionPrecision = functionPrecision;
72         hess_work->r = r;
73         hess_work->Haprox = (double*) Calloc(r, double);                // Hessian Workspace
74         hess_work->Gaprox = (double*) Calloc(r, double);                // Gradient Workspace
75         hess_work->freeParams = freeParams;
76         for(int i = 0; i < numParams; i++) {
77                 freeParams[i] = optima[i];
78         }
79
80         omxMatrix *fitMatrix = oo->matrix;
81         hess_work->fitMatrix = fitMatrix;
82
83         handleFreeVarListHelper(state, freeParams, numParams);
84
85         omxRecompute(fitMatrix);                // Initial recompute in case it matters.        
86         hess_work->f0 = omxMatrixElement(fitMatrix, 0, 0);
87 }
88
89 /**
90   @params i              parameter number
91   @params hess_work      local copy
92   @params optima         shared read-only variable
93   @params gradient       shared write-only variable
94   @params hessian        shared write-only variable
95  */
96 static void omxEstimateHessianOnDiagonal(int i, struct hess_struct* hess_work, 
97         double *optima, double *gradient, double *hessian) {
98
99         static const double v = 2.0; //Note: NumDeriv comments that this could be a parameter, but is hard-coded in the algorithm
100         static const double eps = 1E-4; // Kept here for access purposes.
101
102         int     numParams          = hess_work->numParams;         // read-only
103         double *Haprox             = hess_work->Haprox;
104         double *Gaprox             = hess_work->Gaprox;
105         double *freeParams         = hess_work->freeParams;
106         omxMatrix* fitMatrix = hess_work->fitMatrix; 
107         double functionPrecision   = hess_work->functionPrecision; // read-only
108         double f0                  = hess_work->f0;                // read-only
109         int    r                   = hess_work->r;                 // read-only
110
111
112         /* Part the first: Gradient and diagonal */
113         double iOffset = fabs(functionPrecision * optima[i]);
114         if(fabs(iOffset) < eps) iOffset += eps;
115         if(OMX_DEBUG) {Rprintf("Hessian estimation: iOffset: %f.\n", iOffset);}
116         for(int k = 0; k < r; k++) {                    // Decreasing step size, starting at k == 0
117                 if(OMX_DEBUG) {Rprintf("Hessian estimation: Parameter %d at refinement level %d (%f). One Step Forward.\n", i, k, iOffset);}
118                 freeParams[i] = optima[i] + iOffset;
119
120                 fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
121
122                 omxRecompute(fitMatrix);
123                 double f1 = omxMatrixElement(fitMatrix, 0, 0);
124
125                 if(OMX_DEBUG) {Rprintf("Hessian estimation: One Step Back.\n");}
126
127                 freeParams[i] = optima[i] - iOffset;
128
129                 fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
130
131                 omxRecompute(fitMatrix);
132                 double f2 = omxMatrixElement(fitMatrix, 0, 0);
133
134                 Gaprox[k] = (f1 - f2) / (2.0*iOffset);                                          // This is for the gradient
135                 Haprox[k] = (f1 - 2.0 * f0 + f2) / (iOffset * iOffset);         // This is second derivative
136                 freeParams[i] = optima[i];                                                                      // Reset parameter value
137                 iOffset /= v;
138                 if(OMX_DEBUG) {Rprintf("Hessian estimation: (%d, %d)--Calculating F1: %f F2: %f, Haprox: %f...\n", i, i, f1, f2, Haprox[k]);}
139         }
140
141         for(int m = 1; m < r; m++) {                                            // Richardson Step
142                 for(int k = 0; k < (r - m); k++) {
143                         Gaprox[k] = (Gaprox[k+1] * pow(4.0, m) - Gaprox[k])/(pow(4.0, m)-1); // NumDeriv Hard-wires 4s for r here. Why?
144                         Haprox[k] = (Haprox[k+1] * pow(4.0, m) - Haprox[k])/(pow(4.0, m)-1); // NumDeriv Hard-wires 4s for r here. Why?
145                 }
146         }
147
148         if(OMX_DEBUG) { Rprintf("Hessian estimation: Populating Hessian (0x%x) at ([%d, %d] = %d) with value %f...\n", hessian, i, i, i*numParams+i, Haprox[0]); }
149         gradient[i] = Gaprox[0];                                                // NPSOL reports a gradient that's fine.  Why report two?
150         hessian[i*numParams + i] = Haprox[0];
151
152         if(OMX_DEBUG) {Rprintf("Done with parameter %d.\n", i);}
153
154 }
155
156 static void omxEstimateHessianOffDiagonal(int i, int l, struct hess_struct* hess_work, 
157         double *optima, double *gradient, double *hessian) {
158
159     static const double v = 2.0; //Note: NumDeriv comments that this could be a parameter, but is hard-coded in the algorithm
160     static const double eps = 1E-4; // Kept here for access purposes.
161
162         int     numParams          = hess_work->numParams;         // read-only
163         double *Haprox             = hess_work->Haprox;
164         double *freeParams         = hess_work->freeParams;
165         omxMatrix* fitMatrix = hess_work->fitMatrix; 
166         double functionPrecision   = hess_work->functionPrecision; // read-only
167         double f0                  = hess_work->f0;                // read-only
168         int    r                   = hess_work->r;                 // read-only
169
170         double iOffset = fabs(functionPrecision*optima[i]);
171         if(fabs(iOffset) < eps) iOffset += eps;
172         double lOffset = fabs(functionPrecision*optima[l]);
173         if(fabs(lOffset) < eps) lOffset += eps;
174
175         for(int k = 0; k < r; k++) {
176                 freeParams[i] = optima[i] + iOffset;
177                 freeParams[l] = optima[l] + lOffset;
178
179                 fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
180
181                 omxRecompute(fitMatrix);
182                 double f1 = omxMatrixElement(fitMatrix, 0, 0);
183
184                 if(OMX_DEBUG) {Rprintf("Hessian estimation: One Step Back.\n");}
185
186                 freeParams[i] = optima[i] - iOffset;
187                 freeParams[l] = optima[l] - lOffset;
188
189                 fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
190
191                 omxRecompute(fitMatrix);
192                 double f2 = omxMatrixElement(fitMatrix, 0, 0);
193
194                 Haprox[k] = (f1 - 2.0 * f0 + f2 - hessian[i*numParams+i]*iOffset*iOffset -
195                                                 hessian[l*numParams+l]*lOffset*lOffset)/(2.0*iOffset*lOffset);
196                 if(OMX_DEBUG) {Rprintf("Hessian first off-diagonal calculation: Haprox = %f, iOffset = %f, lOffset=%f from params %f, %f and %f, %f and %d (also: %f, %f and %f).\n", Haprox[k], iOffset, lOffset, f1, hessian[i*numParams+i], hessian[l*numParams+l], v, k, pow(v, k), functionPrecision*optima[i], functionPrecision*optima[l]);}
197
198                 freeParams[i] = optima[i];                              // Reset parameter values
199                 freeParams[l] = optima[l];
200
201                 iOffset = iOffset / v;                                  //  And shrink step
202                 lOffset = lOffset / v;
203         }
204
205         for(int m = 1; m < r; m++) {                                            // Richardson Step
206                 for(int k = 0; k < (r - m); k++) {
207                         if(OMX_DEBUG) {Rprintf("Hessian off-diagonal calculation: Haprox = %f, iOffset = %f, lOffset=%f from params %f, %f and %f, %f and %d (also: %f, %f and %f, and %f).\n", Haprox[k], iOffset, lOffset, functionPrecision, optima[i], optima[l], v, m, pow(4.0, m), functionPrecision*optima[i], functionPrecision*optima[l], k);}
208                         Haprox[k] = (Haprox[k+1] * pow(4.0, m) - Haprox[k]) / (pow(4.0, m)-1);
209                 }
210         }
211
212         if(OMX_DEBUG) {Rprintf("Hessian estimation: Populating Hessian (0x%x) at ([%d, %d] = %d and %d) with value %f...", hessian, i, l, i*numParams+l, l*numParams+i, Haprox[0]);}
213         hessian[i*numParams+l] = Haprox[0];
214         hessian[l*numParams+i] = Haprox[0];
215
216 }
217
218 void omxComputeEstimateHessian::doHessianCalculation(int numParams, int numChildren, 
219         struct hess_struct *hess_work)
220 {
221         int i,j;
222
223         double* parent_optima = globalState->optimalValues;
224
225         int numOffDiagonal = (numParams * (numParams - 1)) / 2;
226         int *diags = Calloc(numOffDiagonal, int);
227         int *offDiags = Calloc(numOffDiagonal, int);
228         int offset = 0;
229         // gcc does not detect the usage of the following variable
230         // in the omp parallel pragma, and marks the variable as
231         // unused, so the attribute is placed to silence the warning.
232     int __attribute__((unused)) parallelism = (numChildren == 0) ? 1 : numChildren;
233
234         // There must be a way to avoid constructing the
235         // diags and offDiags arrays and replace them with functions
236         // that produce these values given the input
237         /// {0, 1, ..., numOffDiagonal - 1} -- M. Spiegel
238         for(i = 0; i < numParams; i++) {
239                 for(j = i - 1; j >= 0; j--) {
240                         diags[offset] = i;
241                         offDiags[offset] = j;
242                         offset++;
243                 }
244         }
245
246         #pragma omp parallel for num_threads(parallelism) 
247         for(i = 0; i < numParams; i++) {
248                 int threadId = (numChildren < 2) ? 0 : omx_absolute_thread_num();
249                 omxEstimateHessianOnDiagonal(i, hess_work + threadId, 
250                 parent_optima, gradient, hessian);
251         }
252
253         #pragma omp parallel for num_threads(parallelism) 
254         for(offset = 0; offset < numOffDiagonal; offset++) {
255                 int threadId = (numChildren < 2) ? 0 : omx_absolute_thread_num();
256                 omxEstimateHessianOffDiagonal(diags[offset], offDiags[offset],
257                         hess_work + threadId, parent_optima, gradient, hessian);
258         }
259
260         Free(diags);
261         Free(offDiags);
262 }
263
264 omxComputeEstimateHessian::omxComputeEstimateHessian() :
265         stepSize(.0001),
266         numIter(4),
267         stdErrors(NULL)
268 {
269 }
270
271 omxComputeEstimateHessian::~omxComputeEstimateHessian()
272 {
273 }
274
275 class omxCompute *newComputeEstimateHessian()
276 {
277         return new omxComputeEstimateHessian;
278 }
279
280 void omxComputeEstimateHessian::compute()
281 {
282         int numParams = globalState->numFreeParams;
283
284         PROTECT(calculatedHessian = allocMatrix(REALSXP, numParams, numParams));
285
286         // TODO: Check for nonlinear constraints and adjust algorithm accordingly.
287         // TODO: Allow more than one hessian value for calculation
288
289         int numChildren = globalState->numChildren;
290
291         struct hess_struct* hess_work;
292         if (numChildren < 2) {
293                 hess_work = Calloc(1, struct hess_struct);
294                 omxPopulateHessianWork(hess_work, stepSize, numIter, globalState, globalState);
295         } else {
296                 hess_work = Calloc(numChildren, struct hess_struct);
297                 for(int i = 0; i < numChildren; i++) {
298                         omxPopulateHessianWork(hess_work + i, stepSize, numIter, globalState->childList[i], globalState);
299                 }
300         }
301         if(OMX_DEBUG) Rprintf("Hessian Calculation using %d children\n", numChildren);
302
303         this->hessian = (double*) R_alloc(numParams * numParams, sizeof(double));
304
305         this->gradient = (double*) R_alloc(numParams, sizeof(double));
306   
307         this->doHessianCalculation(numParams, numChildren, hess_work);
308
309         if(OMX_DEBUG) {Rprintf("Hessian Computation complete.\n");}
310
311         if (numChildren < 2) {
312                 Free(hess_work->Haprox);
313                 Free(hess_work->Gaprox);
314                 Free(hess_work->freeParams);
315             Free(hess_work);
316         } else {
317                 for(int i = 0; i < numChildren; i++) {
318                         Free((hess_work + i)->Haprox);
319                         Free((hess_work + i)->Gaprox);
320                         Free((hess_work + i)->freeParams);
321                 }
322                 Free(hess_work);
323         }
324
325         memcpy(REAL(calculatedHessian), hessian, sizeof(double) * numParams * numParams);
326
327         if (globalState->calculateStdErrors) {
328                 // This function calculates the standard errors from the hessian matrix
329                 // sqrt(diag(solve(hessian)))
330
331                 const double scale = 2;
332                 double* workspace = (double *) Calloc(numParams * numParams, double);
333         
334                 for(int i = 0; i < numParams; i++)
335                         for(int j = 0; j <= i; j++)
336                                 workspace[i*numParams+j] = hessian[i*numParams+j];              // Populate upper triangle
337         
338                 char u = 'U';
339                 int *ipiv = new int[numParams];
340                 int lwork = -1;
341                 double temp;
342                 int info = 0;
343         
344                 F77_CALL(dsytrf)(&u, &numParams, workspace, &numParams, ipiv, &temp, &lwork, &info);
345         
346                 lwork = (temp > numParams?temp:numParams);
347         
348                 double* work = (double*) Calloc(lwork, double);
349         
350                 F77_CALL(dsytrf)(&u, &numParams, workspace, &numParams, ipiv, work, &lwork, &info);
351         
352                 if(info != 0) {
353                         // report error TODO
354                 } else {
355                         F77_CALL(dsytri)(&u, &numParams, workspace, &numParams, ipiv, work, &info);
356         
357                         if(info != 0) {
358                                 // report error TODO
359                         } else {
360                                 PROTECT(stdErrors = allocMatrix(REALSXP, numParams, 1));
361                                 double* stdErr = REAL(stdErrors);
362                                 for(int i = 0; i < numParams; i++) {
363                                         stdErr[i] = sqrt(scale) * sqrt(workspace[i * numParams + i]);
364                                 }
365                         }
366                 }
367         
368                 delete ipiv;
369                 Free(workspace);
370                 Free(work);
371         }
372 }
373
374 void omxComputeEstimateHessian::reportResults(MxRList *result)
375 {
376         result->push_back(std::make_pair(mkChar("calculatedHessian"), calculatedHessian));
377
378         if (stdErrors) {
379                 result->push_back(std::make_pair(mkChar("standardErrors"), stdErrors));
380         }
381 }