omxComputeEstimateHessian reorg 2/11
[openmx:openmx.git] / src / omxHessianCalculation.cpp
1 /*
2  *  Copyright 2007-2013 The OpenMx Project
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *       http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *   Unless required by applicable law or agreed to in writing, software
11  *   distributed under the License is distributed on an "AS IS" BASIS,
12  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16
17 #include <stdio.h>
18 #include <sys/types.h>
19 #include <errno.h>
20
21 #include <R.h>
22 #include <Rinternals.h>
23 #include <Rdefines.h>
24 #include <R_ext/Rdynload.h>
25 #include <R_ext/BLAS.h>
26 #include <R_ext/Lapack.h>
27
28 #include "omxDefines.h"
29 #include "npsolWrap.h"
30 #include "omxState.h"
31 #include "omxMatrix.h"
32 #include "omxAlgebra.h"
33 #include "omxFitFunction.h"
34 #include "omxNPSOLSpecific.h"
35 #include "omxOptimizer.h"
36 #include "omxOpenmpWrap.h"
37 #include "omxHessianCalculation.h"
38 #include "omxGlobalState.h"
39 #include "omxExportBackendState.h"
40
41 struct hess_struct {
42         int     numParams;
43         double* freeParams;
44         double* Haprox;
45         double* Gaprox;
46         omxMatrix* fitMatrix;
47         double  f0;
48         double  functionPrecision;
49         int r;
50 };
51
52 void omxPopulateHessianWork(struct hess_struct *hess_work, 
53         double functionPrecision, int r, omxState* state,
54         omxState *parentState) {
55
56         omxFitFunction* oo = state->fitMatrix->fitFunction;
57         int numParams = state->numFreeParams;
58
59         double* optima = parentState->optimalValues;
60         double *freeParams = (double*) Calloc(numParams, double);
61
62         hess_work->numParams = numParams;
63         hess_work->functionPrecision = functionPrecision;
64         hess_work->r = r;
65         hess_work->Haprox = (double*) Calloc(r, double);                // Hessian Workspace
66         hess_work->Gaprox = (double*) Calloc(r, double);                // Gradient Workspace
67         hess_work->freeParams = freeParams;
68         for(int i = 0; i < numParams; i++) {
69                 freeParams[i] = optima[i];
70         }
71
72         omxMatrix *fitMatrix = oo->matrix;
73         hess_work->fitMatrix = fitMatrix;
74
75         handleFreeVarListHelper(state, freeParams, numParams);
76
77         omxRecompute(fitMatrix);                // Initial recompute in case it matters.        
78         hess_work->f0 = omxMatrixElement(fitMatrix, 0, 0);
79 }
80
81 /**
82   @params i              parameter number
83   @params hess_work      local copy
84   @params optima         shared read-only variable
85   @params gradient       shared write-only variable
86   @params hessian        shared write-only variable
87  */
88 void omxEstimateHessianOnDiagonal(int i, struct hess_struct* hess_work, 
89         double *optima, double *gradient, double *hessian) {
90
91         static const double v = 2.0; //Note: NumDeriv comments that this could be a parameter, but is hard-coded in the algorithm
92         static const double eps = 1E-4; // Kept here for access purposes.
93
94         int     numParams          = hess_work->numParams;         // read-only
95         double *Haprox             = hess_work->Haprox;
96         double *Gaprox             = hess_work->Gaprox;
97         double *freeParams         = hess_work->freeParams;
98         omxMatrix* fitMatrix = hess_work->fitMatrix; 
99         double functionPrecision   = hess_work->functionPrecision; // read-only
100         double f0                  = hess_work->f0;                // read-only
101         int    r                   = hess_work->r;                 // read-only
102
103
104         /* Part the first: Gradient and diagonal */
105         double iOffset = fabs(functionPrecision * optima[i]);
106         if(fabs(iOffset) < eps) iOffset += eps;
107         if(OMX_DEBUG) {Rprintf("Hessian estimation: iOffset: %f.\n", iOffset);}
108         for(int k = 0; k < r; k++) {                    // Decreasing step size, starting at k == 0
109                 if(OMX_DEBUG) {Rprintf("Hessian estimation: Parameter %d at refinement level %d (%f). One Step Forward.\n", i, k, iOffset);}
110                 freeParams[i] = optima[i] + iOffset;
111
112                 fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
113
114                 omxRecompute(fitMatrix);
115                 double f1 = omxMatrixElement(fitMatrix, 0, 0);
116
117                 if(OMX_DEBUG) {Rprintf("Hessian estimation: One Step Back.\n");}
118
119                 freeParams[i] = optima[i] - iOffset;
120
121                 fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
122
123                 omxRecompute(fitMatrix);
124                 double f2 = omxMatrixElement(fitMatrix, 0, 0);
125
126                 Gaprox[k] = (f1 - f2) / (2.0*iOffset);                                          // This is for the gradient
127                 Haprox[k] = (f1 - 2.0 * f0 + f2) / (iOffset * iOffset);         // This is second derivative
128                 freeParams[i] = optima[i];                                                                      // Reset parameter value
129                 iOffset /= v;
130                 if(OMX_DEBUG) {Rprintf("Hessian estimation: (%d, %d)--Calculating F1: %f F2: %f, Haprox: %f...\n", i, i, f1, f2, Haprox[k]);}
131         }
132
133         for(int m = 1; m < r; m++) {                                            // Richardson Step
134                 for(int k = 0; k < (r - m); k++) {
135                         Gaprox[k] = (Gaprox[k+1] * pow(4.0, m) - Gaprox[k])/(pow(4.0, m)-1); // NumDeriv Hard-wires 4s for r here. Why?
136                         Haprox[k] = (Haprox[k+1] * pow(4.0, m) - Haprox[k])/(pow(4.0, m)-1); // NumDeriv Hard-wires 4s for r here. Why?
137                 }
138         }
139
140         if(OMX_DEBUG) { Rprintf("Hessian estimation: Populating Hessian (0x%x) at ([%d, %d] = %d) with value %f...\n", hessian, i, i, i*numParams+i, Haprox[0]); }
141         gradient[i] = Gaprox[0];                                                // NPSOL reports a gradient that's fine.  Why report two?
142         hessian[i*numParams + i] = Haprox[0];
143
144         if(OMX_DEBUG) {Rprintf("Done with parameter %d.\n", i);}
145
146 }
147
148 void omxEstimateHessianOffDiagonal(int i, int l, struct hess_struct* hess_work, 
149         double *optima, double *gradient, double *hessian) {
150
151     static const double v = 2.0; //Note: NumDeriv comments that this could be a parameter, but is hard-coded in the algorithm
152     static const double eps = 1E-4; // Kept here for access purposes.
153
154         int     numParams          = hess_work->numParams;         // read-only
155         double *Haprox             = hess_work->Haprox;
156         double *freeParams         = hess_work->freeParams;
157         omxMatrix* fitMatrix = hess_work->fitMatrix; 
158         double functionPrecision   = hess_work->functionPrecision; // read-only
159         double f0                  = hess_work->f0;                // read-only
160         int    r                   = hess_work->r;                 // read-only
161
162         double iOffset = fabs(functionPrecision*optima[i]);
163         if(fabs(iOffset) < eps) iOffset += eps;
164         double lOffset = fabs(functionPrecision*optima[l]);
165         if(fabs(lOffset) < eps) lOffset += eps;
166
167         for(int k = 0; k < r; k++) {
168                 freeParams[i] = optima[i] + iOffset;
169                 freeParams[l] = optima[l] + lOffset;
170
171                 fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
172
173                 omxRecompute(fitMatrix);
174                 double f1 = omxMatrixElement(fitMatrix, 0, 0);
175
176                 if(OMX_DEBUG) {Rprintf("Hessian estimation: One Step Back.\n");}
177
178                 freeParams[i] = optima[i] - iOffset;
179                 freeParams[l] = optima[l] - lOffset;
180
181                 fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
182
183                 omxRecompute(fitMatrix);
184                 double f2 = omxMatrixElement(fitMatrix, 0, 0);
185
186                 Haprox[k] = (f1 - 2.0 * f0 + f2 - hessian[i*numParams+i]*iOffset*iOffset -
187                                                 hessian[l*numParams+l]*lOffset*lOffset)/(2.0*iOffset*lOffset);
188                 if(OMX_DEBUG) {Rprintf("Hessian first off-diagonal calculation: Haprox = %f, iOffset = %f, lOffset=%f from params %f, %f and %f, %f and %d (also: %f, %f and %f).\n", Haprox[k], iOffset, lOffset, f1, hessian[i*numParams+i], hessian[l*numParams+l], v, k, pow(v, k), functionPrecision*optima[i], functionPrecision*optima[l]);}
189
190                 freeParams[i] = optima[i];                              // Reset parameter values
191                 freeParams[l] = optima[l];
192
193                 iOffset = iOffset / v;                                  //  And shrink step
194                 lOffset = lOffset / v;
195         }
196
197         for(int m = 1; m < r; m++) {                                            // Richardson Step
198                 for(int k = 0; k < (r - m); k++) {
199                         if(OMX_DEBUG) {Rprintf("Hessian off-diagonal calculation: Haprox = %f, iOffset = %f, lOffset=%f from params %f, %f and %f, %f and %d (also: %f, %f and %f, and %f).\n", Haprox[k], iOffset, lOffset, functionPrecision, optima[i], optima[l], v, m, pow(4.0, m), functionPrecision*optima[i], functionPrecision*optima[l], k);}
200                         Haprox[k] = (Haprox[k+1] * pow(4.0, m) - Haprox[k]) / (pow(4.0, m)-1);
201                 }
202         }
203
204         if(OMX_DEBUG) {Rprintf("Hessian estimation: Populating Hessian (0x%x) at ([%d, %d] = %d and %d) with value %f...", hessian, i, l, i*numParams+l, l*numParams+i, Haprox[0]);}
205         hessian[i*numParams+l] = Haprox[0];
206         hessian[l*numParams+i] = Haprox[0];
207
208 }
209
210 void doHessianCalculation(int numParams, int numChildren, 
211         struct hess_struct *hess_work, omxState* parentState) {
212
213         int i,j;
214
215         omxFitFunction* parent_oo = parentState->fitMatrix->fitFunction;
216         double* parent_gradient = parent_oo->gradient;
217         double* parent_hessian = parent_oo->hessian;
218         double* parent_optima = parentState->optimalValues;
219
220         int numOffDiagonal = (numParams * (numParams - 1)) / 2;
221         int *diags = Calloc(numOffDiagonal, int);
222         int *offDiags = Calloc(numOffDiagonal, int);
223         int offset = 0;
224         // gcc does not detect the usage of the following variable
225         // in the omp parallel pragma, and marks the variable as
226         // unused, so the attribute is placed to silence the warning.
227     int __attribute__((unused)) parallelism = (numChildren == 0) ? 1 : numChildren;
228
229         // There must be a way to avoid constructing the
230         // diags and offDiags arrays and replace them with functions
231         // that produce these values given the input
232         /// {0, 1, ..., numOffDiagonal - 1} -- M. Spiegel
233         for(i = 0; i < numParams; i++) {
234                 for(j = i - 1; j >= 0; j--) {
235                         diags[offset] = i;
236                         offDiags[offset] = j;
237                         offset++;
238                 }
239         }
240
241         #pragma omp parallel for num_threads(parallelism) 
242         for(i = 0; i < numParams; i++) {
243                 int threadId = (numChildren < 2) ? 0 : omx_absolute_thread_num();
244                 omxEstimateHessianOnDiagonal(i, hess_work + threadId, 
245                         parent_optima, parent_gradient, parent_hessian);
246         }
247
248         #pragma omp parallel for num_threads(parallelism) 
249         for(offset = 0; offset < numOffDiagonal; offset++) {
250                 int threadId = (numChildren < 2) ? 0 : omx_absolute_thread_num();
251                 omxEstimateHessianOffDiagonal(diags[offset], offDiags[offset],
252                         hess_work + threadId, parent_optima, parent_gradient,
253                         parent_hessian);
254         }
255
256         Free(diags);
257         Free(offDiags);
258
259 }
260
261 /*************************************************************************************
262  *
263  *   omxEstimateHessian
264  *
265  *  author: tbrick, 2010-02-04
266  *
267  *  Based on code in the numDeriv library for R <citation needed> // TODO: Find NumDeriv Citation
268  *
269  *  @params functionPrecision   functional precision for the calculation
270  *  @params r                                   number of repetitions for Richardson approximation
271  *  @params currentState                the current omxState
272  *
273  ************************************************************************************/
274 void omxEstimateHessian(double functionPrecision, int r)
275 {
276         omxState* parentState = globalState;
277
278         // TODO: Check for nonlinear constraints and adjust algorithm accordingly.
279         // TODO: Allow more than one hessian value for calculation
280
281         int numChildren = parentState->numChildren;
282         int numParams = parentState->numFreeParams;
283         int i;
284
285     struct hess_struct* hess_work;
286         if (numChildren < 2) {
287                 hess_work = Calloc(1, struct hess_struct);
288                 omxPopulateHessianWork(hess_work, functionPrecision, r, parentState, parentState);
289         } else {
290                 hess_work = Calloc(numChildren, struct hess_struct);
291                 for(int i = 0; i < numChildren; i++) {
292                         omxPopulateHessianWork(hess_work + i, functionPrecision, r, parentState->childList[i], parentState);
293                 }
294         }
295         if(OMX_DEBUG) Rprintf("Hessian Calculation using %d children\n", numChildren);
296
297         omxFitFunction* parent_oo = parentState->fitMatrix->fitFunction;
298
299         if(parent_oo->hessian == NULL) {
300                 parent_oo->hessian = (double*) R_alloc(numParams * numParams, sizeof(double));
301                 if(OMX_DEBUG) {Rprintf("Generated hessian memory, (%d x %d), at 0x%x.\n", numParams, numParams, parent_oo->hessian);}
302         }
303
304         if(parent_oo->gradient == NULL) {
305                 parent_oo->gradient = (double*) R_alloc(numParams, sizeof(double));
306                 if(OMX_DEBUG) {Rprintf("Generated gradient memory, (%d), at 0x%x.\n", numParams, parent_oo->gradient);}
307         }
308   
309         doHessianCalculation(numParams, numChildren, hess_work, parentState);
310
311         if(OMX_DEBUG) {Rprintf("Hessian Computation complete.\n");}
312
313         if (numChildren < 2) {
314                 Free(hess_work->Haprox);
315                 Free(hess_work->Gaprox);
316                 Free(hess_work->freeParams);
317             Free(hess_work);
318         } else {
319                 for(i = 0; i < numChildren; i++) {
320                         Free((hess_work + i)->Haprox);
321                         Free((hess_work + i)->Gaprox);
322                         Free((hess_work + i)->freeParams);
323                 }
324                 Free(hess_work);
325         }
326 }
327
328 class omxCompute *newComputeEstimateHessian()
329 {
330         return new omxComputeEstimateHessian;
331 }
332
333 void omxComputeEstimateHessian::compute()
334 {
335         int n = globalState->numFreeParams;
336
337         PROTECT(calculatedHessian = allocMatrix(REALSXP, n, n));
338         PROTECT(stdErrors = allocMatrix(REALSXP, n, 1));
339
340         omxEstimateHessian(.0001, 4);
341         if(globalState->calculateStdErrors) {
342                 if(OMX_DEBUG) { Rprintf("Calculating Standard Errors for Fit Function.\n");}
343                 omxFitFunction* oo = globalState->fitMatrix->fitFunction;
344                 omxCalculateStdErrorFromHessian(2.0, oo);
345         }
346
347         omxPopulateHessians(globalState->numHessians, globalState->fitMatrix, 
348                             calculatedHessian, stdErrors, globalState->calculateStdErrors, n);
349 }
350
351 void omxComputeEstimateHessian::reportResults(MxRList *result)
352 {
353         result->push_back(std::make_pair(mkChar("calculatedHessian"), calculatedHessian));
354
355         if (globalState->calculateStdErrors) {
356                 result->push_back(std::make_pair(mkChar("standardErrors"), stdErrors));
357         }
358 }