Revert "MxFitFunctionMultigroup"
[openmx:openmx.git] / src / omxHessianCalculation.cpp
1 /*
2  *  Copyright 2007-2013 The OpenMx Project
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *       http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *   Unless required by applicable law or agreed to in writing, software
11  *   distributed under the License is distributed on an "AS IS" BASIS,
12  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16
17 #include "R.h"
18 #include <Rinternals.h>
19 #include <Rdefines.h>
20 #include <R_ext/Rdynload.h>
21 #include <R_ext/BLAS.h>
22 #include <R_ext/Lapack.h>
23 #include "omxDefines.h"
24 #include "npsolWrap.h"
25
26 #include <stdio.h>
27 #include <sys/types.h>
28 #include <errno.h>
29 #include "omxState.h"
30 #include "omxMatrix.h"
31 #include "omxAlgebra.h"
32 #include "omxFitFunction.h"
33 #include "omxNPSOLSpecific.h"
34 #include "omxOptimizer.h"
35 #include "omxOpenmpWrap.h"
36
37 struct hess_struct {
38         int     numParams;
39         double* freeParams;
40         double* Haprox;
41         double* Gaprox;
42         omxMatrix* fitMatrix;
43         double  f0;
44         double  functionPrecision;
45         int r;
46 };
47
48 void omxPopulateHessianWork(struct hess_struct *hess_work, 
49         double functionPrecision, int r, omxState* state,
50         omxState *parentState) {
51
52         omxFitFunction* oo = state->fitMatrix->fitFunction;
53         int numParams = state->numFreeParams;
54
55         double* optima = parentState->optimalValues;
56         double *freeParams = (double*) Calloc(numParams, double);
57
58         hess_work->numParams = numParams;
59         hess_work->functionPrecision = functionPrecision;
60         hess_work->r = r;
61         hess_work->Haprox = (double*) Calloc(r, double);                // Hessian Workspace
62         hess_work->Gaprox = (double*) Calloc(r, double);                // Gradient Workspace
63         hess_work->freeParams = freeParams;
64         for(int i = 0; i < numParams; i++) {
65                 freeParams[i] = optima[i];
66         }
67
68         omxMatrix *fitMatrix = oo->matrix;
69         hess_work->fitMatrix = fitMatrix;
70
71         if (fitMatrix->fitFunction->repopulateFun != NULL) {    //  Just in case
72                 fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
73         } else {
74                 handleFreeVarList(state, freeParams, numParams);
75         }
76         omxRecompute(fitMatrix);                // Initial recompute in case it matters.        
77         hess_work->f0 = omxMatrixElement(fitMatrix, 0, 0);
78 }
79
80 /**
81   @params i              parameter number
82   @params hess_work      local copy
83   @params optima         shared read-only variable
84   @params gradient       shared write-only variable
85   @params hessian        shared write-only variable
86  */
87 void omxEstimateHessianOnDiagonal(int i, struct hess_struct* hess_work, 
88         double *optima, double *gradient, double *hessian) {
89
90         static const double v = 2.0; //Note: NumDeriv comments that this could be a parameter, but is hard-coded in the algorithm
91         static const double eps = 1E-4; // Kept here for access purposes.
92
93         int     numParams          = hess_work->numParams;         // read-only
94         double *Haprox             = hess_work->Haprox;
95         double *Gaprox             = hess_work->Gaprox;
96         double *freeParams         = hess_work->freeParams;
97         omxMatrix* fitMatrix = hess_work->fitMatrix; 
98         double functionPrecision   = hess_work->functionPrecision; // read-only
99         double f0                  = hess_work->f0;                // read-only
100         int    r                   = hess_work->r;                 // read-only
101     omxState *currentState     = fitMatrix->currentState;
102
103
104         /* Part the first: Gradient and diagonal */
105         double iOffset = fabs(functionPrecision * optima[i]);
106         if(fabs(iOffset) < eps) iOffset += eps;
107         if(OMX_DEBUG) {Rprintf("Hessian estimation: iOffset: %f.\n", iOffset);}
108         for(int k = 0; k < r; k++) {                    // Decreasing step size, starting at k == 0
109                 if(OMX_DEBUG) {Rprintf("Hessian estimation: Parameter %d at refinement level %d (%f). One Step Forward.\n", i, k, iOffset);}
110                 freeParams[i] = optima[i] + iOffset;
111                 if (fitMatrix->fitFunction->repopulateFun != NULL) {    //  Just in case
112                         fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
113                 } else {
114                         handleFreeVarList(currentState, freeParams, numParams);
115                 }
116                 omxRecompute(fitMatrix);
117                 double f1 = omxMatrixElement(fitMatrix, 0, 0);
118
119                 if(OMX_DEBUG) {Rprintf("Hessian estimation: One Step Back.\n");}
120
121                 freeParams[i] = optima[i] - iOffset;
122                 if (fitMatrix->fitFunction->repopulateFun != NULL) {    // Just in case
123                         fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
124                 } else {
125                         handleFreeVarList(currentState, freeParams, numParams);
126                 }
127                 omxRecompute(fitMatrix);
128                 double f2 = omxMatrixElement(fitMatrix, 0, 0);
129
130                 Gaprox[k] = (f1 - f2) / (2.0*iOffset);                                          // This is for the gradient
131                 Haprox[k] = (f1 - 2.0 * f0 + f2) / (iOffset * iOffset);         // This is second derivative
132                 freeParams[i] = optima[i];                                                                      // Reset parameter value
133                 iOffset /= v;
134                 if(OMX_DEBUG) {Rprintf("Hessian estimation: (%d, %d)--Calculating F1: %f F2: %f, Haprox: %f...\n", i, i, f1, f2, Haprox[k]);}
135         }
136
137         for(int m = 1; m < r; m++) {                                            // Richardson Step
138                 for(int k = 0; k < (r - m); k++) {
139                         Gaprox[k] = (Gaprox[k+1] * pow(4.0, m) - Gaprox[k])/(pow(4.0, m)-1); // NumDeriv Hard-wires 4s for r here. Why?
140                         Haprox[k] = (Haprox[k+1] * pow(4.0, m) - Haprox[k])/(pow(4.0, m)-1); // NumDeriv Hard-wires 4s for r here. Why?
141                 }
142         }
143
144         if(OMX_DEBUG) { Rprintf("Hessian estimation: Populating Hessian (0x%x) at ([%d, %d] = %d) with value %f...\n", hessian, i, i, i*numParams+i, Haprox[0]); }
145         gradient[i] = Gaprox[0];                                                // NPSOL reports a gradient that's fine.  Why report two?
146         hessian[i*numParams + i] = Haprox[0];
147
148         if(OMX_DEBUG) {Rprintf("Done with parameter %d.\n", i);}
149
150 }
151
152 void omxEstimateHessianOffDiagonal(int i, int l, struct hess_struct* hess_work, 
153         double *optima, double *gradient, double *hessian) {
154
155     static const double v = 2.0; //Note: NumDeriv comments that this could be a parameter, but is hard-coded in the algorithm
156     static const double eps = 1E-4; // Kept here for access purposes.
157
158         int     numParams          = hess_work->numParams;         // read-only
159         double *Haprox             = hess_work->Haprox;
160         double *freeParams         = hess_work->freeParams;
161         omxMatrix* fitMatrix = hess_work->fitMatrix; 
162         double functionPrecision   = hess_work->functionPrecision; // read-only
163         double f0                  = hess_work->f0;                // read-only
164         int    r                   = hess_work->r;                 // read-only
165     omxState *currentState     = fitMatrix->currentState;
166
167         double iOffset = fabs(functionPrecision*optima[i]);
168         if(fabs(iOffset) < eps) iOffset += eps;
169         double lOffset = fabs(functionPrecision*optima[l]);
170         if(fabs(lOffset) < eps) lOffset += eps;
171
172         for(int k = 0; k < r; k++) {
173                 freeParams[i] = optima[i] + iOffset;
174                 freeParams[l] = optima[l] + lOffset;
175                 if (fitMatrix->fitFunction->repopulateFun != NULL) {    //  Just in case
176                         fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
177                 } else {
178                         handleFreeVarList(currentState, freeParams, numParams);
179                 }
180                 omxRecompute(fitMatrix);
181                 double f1 = omxMatrixElement(fitMatrix, 0, 0);
182
183                 if(OMX_DEBUG) {Rprintf("Hessian estimation: One Step Back.\n");}
184
185                 freeParams[i] = optima[i] - iOffset;
186                 freeParams[l] = optima[l] - lOffset;
187                 if (fitMatrix->fitFunction->repopulateFun != NULL) {    // Just in case
188                         fitMatrix->fitFunction->repopulateFun(fitMatrix->fitFunction, freeParams, numParams);
189                 } else {
190                         handleFreeVarList(currentState, freeParams, numParams);
191                 }
192                 omxRecompute(fitMatrix);
193                 double f2 = omxMatrixElement(fitMatrix, 0, 0);
194
195                 Haprox[k] = (f1 - 2.0 * f0 + f2 - hessian[i*numParams+i]*iOffset*iOffset -
196                                                 hessian[l*numParams+l]*lOffset*lOffset)/(2.0*iOffset*lOffset);
197                 if(OMX_DEBUG) {Rprintf("Hessian first off-diagonal calculation: Haprox = %f, iOffset = %f, lOffset=%f from params %f, %f and %f, %f and %d (also: %f, %f and %f).\n", Haprox[k], iOffset, lOffset, f1, hessian[i*numParams+i], hessian[l*numParams+l], v, k, pow(v, k), functionPrecision*optima[i], functionPrecision*optima[l]);}
198
199                 freeParams[i] = optima[i];                              // Reset parameter values
200                 freeParams[l] = optima[l];
201
202                 iOffset = iOffset / v;                                  //  And shrink step
203                 lOffset = lOffset / v;
204         }
205
206         for(int m = 1; m < r; m++) {                                            // Richardson Step
207                 for(int k = 0; k < (r - m); k++) {
208                         if(OMX_DEBUG) {Rprintf("Hessian off-diagonal calculation: Haprox = %f, iOffset = %f, lOffset=%f from params %f, %f and %f, %f and %d (also: %f, %f and %f, and %f).\n", Haprox[k], iOffset, lOffset, functionPrecision, optima[i], optima[l], v, m, pow(4.0, m), functionPrecision*optima[i], functionPrecision*optima[l], k);}
209                         Haprox[k] = (Haprox[k+1] * pow(4.0, m) - Haprox[k]) / (pow(4.0, m)-1);
210                 }
211         }
212
213         if(OMX_DEBUG) {Rprintf("Hessian estimation: Populating Hessian (0x%x) at ([%d, %d] = %d and %d) with value %f...", hessian, i, l, i*numParams+l, l*numParams+i, Haprox[0]);}
214         hessian[i*numParams+l] = Haprox[0];
215         hessian[l*numParams+i] = Haprox[0];
216
217 }
218
219 void doHessianCalculation(int numParams, int numChildren, 
220         struct hess_struct *hess_work, omxState* parentState) {
221
222         int i,j;
223
224         omxFitFunction* parent_oo = parentState->fitMatrix->fitFunction;
225         double* parent_gradient = parent_oo->gradient;
226         double* parent_hessian = parent_oo->hessian;
227         double* parent_optima = parentState->optimalValues;
228
229         int numOffDiagonal = (numParams * (numParams - 1)) / 2;
230         int *diags = Calloc(numOffDiagonal, int);
231         int *offDiags = Calloc(numOffDiagonal, int);
232         int offset = 0;
233         // gcc does not detect the usage of the following variable
234         // in the omp parallel pragma, and marks the variable as
235         // unused, so the attribute is placed to silence the warning.
236     int __attribute__((unused)) parallelism = (numChildren == 0) ? 1 : numChildren;
237
238         // There must be a way to avoid constructing the
239         // diags and offDiags arrays and replace them with functions
240         // that produce these values given the input
241         /// {0, 1, ..., numOffDiagonal - 1} -- M. Spiegel
242         for(i = 0; i < numParams; i++) {
243                 for(j = i - 1; j >= 0; j--) {
244                         diags[offset] = i;
245                         offDiags[offset] = j;
246                         offset++;
247                 }
248         }
249
250         #pragma omp parallel for num_threads(parallelism) 
251         for(i = 0; i < numParams; i++) {
252                 int threadId = (numChildren < 2) ? 0 : omx_absolute_thread_num();
253                 omxEstimateHessianOnDiagonal(i, hess_work + threadId, 
254                         parent_optima, parent_gradient, parent_hessian);
255         }
256
257         #pragma omp parallel for num_threads(parallelism) 
258         for(offset = 0; offset < numOffDiagonal; offset++) {
259                 int threadId = (numChildren < 2) ? 0 : omx_absolute_thread_num();
260                 omxEstimateHessianOffDiagonal(diags[offset], offDiags[offset],
261                         hess_work + threadId, parent_optima, parent_gradient,
262                         parent_hessian);
263         }
264
265         Free(diags);
266         Free(offDiags);
267
268 }
269
270 /*************************************************************************************
271  *
272  *   omxEstimateHessian
273  *
274  *  author: tbrick, 2010-02-04
275  *
276  *  Based on code in the numDeriv library for R <citation needed> // TODO: Find NumDeriv Citation
277  *
278  *  @params functionPrecision   functional precision for the calculation
279  *  @params r                                   number of repetitions for Richardson approximation
280  *  @params currentState                the current omxState
281  *
282  ************************************************************************************/
283 unsigned short omxEstimateHessian(int numHessians, double functionPrecision, int r, omxState* parentState) {
284
285         // TODO: Check for nonlinear constraints and adjust algorithm accordingly.
286         // TODO: Allow more than one hessian value for calculation
287
288         if(numHessians > 1) {
289                 error("NYI: Cannot yet calculate more than a single hessian per optimization.\n");
290         }
291
292         if(numHessians == 0) return FALSE;
293         int numChildren = parentState->numChildren;
294         int numParams = parentState->numFreeParams;
295         int i;
296
297     struct hess_struct* hess_work;
298         if (numChildren < 2) {
299                 hess_work = Calloc(1, struct hess_struct);
300                 omxPopulateHessianWork(hess_work, functionPrecision, r, parentState, parentState);
301         } else {
302                 hess_work = Calloc(numChildren, struct hess_struct);
303                 for(int i = 0; i < numChildren; i++) {
304                         omxPopulateHessianWork(hess_work + i, functionPrecision, r, parentState->childList[i], parentState);
305                 }
306         }
307         if(OMX_DEBUG) Rprintf("Hessian Calculation using %d children\n", numChildren);
308
309         omxFitFunction* parent_oo = parentState->fitMatrix->fitFunction;
310
311         if(parent_oo->hessian == NULL) {
312                 parent_oo->hessian = (double*) R_alloc(numParams * numParams, sizeof(double));
313                 if(OMX_DEBUG) {Rprintf("Generated hessian memory, (%d x %d), at 0x%x.\n", numParams, numParams, parent_oo->hessian);}
314         }
315
316         if(parent_oo->gradient == NULL) {
317                 parent_oo->gradient = (double*) R_alloc(numParams, sizeof(double));
318                 if(OMX_DEBUG) {Rprintf("Generated gradient memory, (%d), at 0x%x.\n", numParams, parent_oo->gradient);}
319         }
320   
321         doHessianCalculation(numParams, numChildren, hess_work, parentState);
322
323         if(OMX_DEBUG) {Rprintf("Hessian Computation complete.\n");}
324
325         if (numChildren < 2) {
326                 Free(hess_work->Haprox);
327                 Free(hess_work->Gaprox);
328                 Free(hess_work->freeParams);
329             Free(hess_work);
330         } else {
331                 for(i = 0; i < numChildren; i++) {
332                         Free((hess_work + i)->Haprox);
333                         Free((hess_work + i)->Gaprox);
334                         Free((hess_work + i)->freeParams);
335                 }
336                 Free(hess_work);
337         }
338         return TRUE;
339
340 }