Add Hessian symmetry correction in ComputeOnce path
[openmx:openmx.git] / src / Compute.cpp
1 /*
2  *  Copyright 2013 The OpenMx Project
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *       http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *   Unless required by applicable law or agreed to in writing, software
11  *   distributed under the License is distributed on an "AS IS" BASIS,
12  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16
17 #include "omxDefines.h"
18 #include "Compute.h"
19 #include "omxState.h"
20 #include "omxExportBackendState.h"
21 #include "omxRFitFunction.h"
22
23 std::vector<int> FitContext::markMatrices;
24
25 void FitContext::init()
26 {
27         size_t numParam = varGroup->vars.size();
28         fit = 0;
29         est = new double[numParam];
30         grad = new double[numParam];
31         hess = new double[numParam * numParam];
32 }
33
34 FitContext::FitContext()
35 {
36         parent = NULL;
37         varGroup = Global->freeGroup[0];
38         init();
39
40         size_t numParam = varGroup->vars.size();
41         for (size_t v1=0; v1 < numParam; v1++) {
42                 est[v1] = Global->freeGroup[0]->vars[v1]->start;
43                 grad[v1] = nan("unset");
44                 for (size_t v2=0; v2 < numParam; v2++) {
45                         hess[v1 * numParam + v2] = nan("unset");
46                 }
47         }
48 }
49
50 FitContext::FitContext(FitContext *parent)
51 {
52         varGroup = parent->varGroup;
53         init();
54         fit = parent->fit;
55         size_t numParam = varGroup->vars.size();
56         memcpy(est, parent->est, sizeof(double) * numParam);
57         memcpy(grad, parent->grad, sizeof(double) * numParam);
58         memcpy(hess, parent->hess, sizeof(double) * numParam * numParam);
59 }
60
61 // arg to control what to copy? usually don't want everything TODO
62 FitContext::FitContext(FitContext *parent, FreeVarGroup *varGroup)
63 {
64         this->parent = parent;
65         this->varGroup = varGroup;
66         init();
67
68         FreeVarGroup *src = parent->varGroup;
69         FreeVarGroup *dest = varGroup;
70         size_t svars = parent->varGroup->vars.size();
71         size_t dvars = varGroup->vars.size();
72
73         size_t d1 = 0;
74         for (size_t s1=0; s1 < src->vars.size(); ++s1) {
75                 if (src->vars[s1] != dest->vars[d1]) continue;
76                 est[d1] = parent->est[s1];
77                 grad[d1] = parent->grad[s1];
78
79                 size_t d2 = 0;
80                 for (size_t s2=0; s2 < src->vars.size(); ++s2) {
81                         if (src->vars[s2] != dest->vars[d2]) continue;
82                         hess[d1 * dvars + d2] = parent->hess[s1 * svars + s2];
83                         if (++d2 == dvars) break;
84                 }
85
86                 if (++d1 == dvars) break;
87         }
88         if (d1 != dvars) error("Parent free parameter group is not a superset");
89
90         // pda(parent->est, 1, svars);
91         // pda(est, 1, dvars);
92         // pda(parent->grad, 1, svars);
93         // pda(grad, 1, dvars);
94         // pda(parent->hess, svars, svars);
95         // pda(hess, dvars, dvars);
96 }
97
98 void FitContext::copyParamToModel(omxMatrix *mat)
99 { copyParamToModel(mat->currentState); }
100
101 void FitContext::copyParamToModel(omxMatrix *mat, double *at)
102 { copyParamToModel(mat->currentState, at); }
103
104 void FitContext::updateParentAndFree()
105 {
106         FreeVarGroup *src = varGroup;
107         FreeVarGroup *dest = parent->varGroup;
108         size_t svars = varGroup->vars.size();
109         size_t dvars = parent->varGroup->vars.size();
110
111         size_t s1 = 0;
112         for (size_t d1=0; d1 < dest->vars.size(); ++d1) {
113                 if (dest->vars[d1] != src->vars[s1]) continue;
114                 parent->est[d1] = est[s1];
115                 parent->grad[d1] = grad[s1];
116
117                 size_t s2 = 0;
118                 for (size_t d2=0; d2 < dest->vars.size(); ++d2) {
119                         if (dest->vars[d2] != src->vars[s2]) continue;
120                         parent->hess[d1 * dvars + d2] = hess[s1 * svars + s2];
121                         if (++s2 == svars) break;
122                 }
123
124                 if (++s1 == svars) break;
125         }
126         
127         // pda(est, 1, svars);
128         // pda(parent->est, 1, dvars);
129         // pda(grad, 1, svars);
130         // pda(parent->grad, 1, dvars);
131         // pda(hess, svars, svars);
132         // pda(parent->hess, dvars, dvars);
133
134         delete this;
135 }
136
137 void FitContext::log(int what)
138 {
139         size_t count = varGroup->vars.size();
140         std::string buf;
141         if (what & FF_COMPUTE_FIT) buf += string_snprintf("fit: %.3f\n", fit);
142         if (what & FF_COMPUTE_ESTIMATE) {
143                 buf += "est: c(";
144                 for (size_t vx=0; vx < count; ++vx) {
145                         buf += string_snprintf("%.3f", est[vx]);
146                         if (vx < count - 1) buf += ", ";
147                 }
148                 buf += ")\n";
149         }
150         if (what & FF_COMPUTE_GRADIENT) {
151                 buf += "grad: c(";
152                 for (size_t vx=0; vx < count; ++vx) {
153                         buf += string_snprintf("%.3f", grad[vx]);
154                         if (vx < count - 1) buf += ", ";
155                 }
156                 buf += ")\n";
157         }
158         if (what & FF_COMPUTE_HESSIAN) {
159                 buf += "hess: c(";
160                 for (size_t v1=0; v1 < count; ++v1) {
161                         for (size_t v2=0; v2 < count; ++v2) {
162                                 buf += string_snprintf("%.3f", hess[v1 * count + v2]);
163                                 if (v1 < count-1 || v2 < count-1) buf += ", ";
164                         }
165                         buf += "\n";
166                 }
167                 buf += ")\n";
168         }
169         mxLogBig(buf);
170 }
171
172 void FitContext::cacheFreeVarDependencies()
173 {
174         omxState *os = globalState;
175         size_t numMats = os->matrixList.size();
176         size_t numAlgs = os->algebraList.size();
177
178         markMatrices.clear();
179         markMatrices.resize(numMats + numAlgs, 0);
180
181         // More efficient to use the appropriate group instead of the default group. TODO
182         FreeVarGroup *varGroup = Global->freeGroup[0];
183         for (size_t freeVarIndex = 0; freeVarIndex < varGroup->vars.size(); freeVarIndex++) {
184                 omxFreeVar* freeVar = varGroup->vars[freeVarIndex];
185                 int *deps   = freeVar->deps;
186                 int numDeps = freeVar->numDeps;
187                 for (int index = 0; index < numDeps; index++) {
188                         markMatrices[deps[index] + numMats] = 1;
189                 }
190         }
191 }
192
193 void FitContext::fixHessianSymmetry()
194 {
195         // make non-symmetric entries symmetric, if possible
196         size_t numParam = varGroup->vars.size();
197         for (size_t h1=1; h1 < numParam; h1++) {
198                 for (size_t h2=0; h2 < h1; h2++) {
199                         double upper = hess[h1 * numParam + h2];
200                         double lower = hess[h2 * numParam + h1];
201                         if (isfinite(upper)) continue;
202                         if (isfinite(lower)) {
203                                 hess[h1 * numParam + h2] = lower;
204                         } else {
205                                 error("Hessian is not finite at [%d,%d]", h1,h2);
206                         }
207                 }
208         }
209 }
210
211 static void omxRepopulateRFitFunction(omxFitFunction* oo, double* x, int n)
212 {
213         omxRFitFunction* rFitFunction = (omxRFitFunction*)oo->argStruct;
214
215         SEXP theCall, estimate;
216
217         PROTECT(estimate = allocVector(REALSXP, n));
218         double *est = REAL(estimate);
219         for(int i = 0; i < n ; i++) {
220                 est[i] = x[i];
221         }
222
223         PROTECT(theCall = allocVector(LANGSXP, 4));
224
225         SETCAR(theCall, install("imxUpdateModelValues"));
226         SETCADR(theCall, rFitFunction->model);
227         SETCADDR(theCall, rFitFunction->flatModel);
228         SETCADDDR(theCall, estimate);
229
230         REPROTECT(rFitFunction->model = eval(theCall, R_GlobalEnv), rFitFunction->modelIndex);
231
232         UNPROTECT(2); // theCall, estimate
233 }
234
235 void FitContext::copyParamToModel(omxState* os)
236 {
237         copyParamToModel(os, est);
238 }
239
240 void FitContext::copyParamToModel(omxState* os, double *at)
241 {
242         size_t numParam = varGroup->vars.size();
243         if(OMX_DEBUG) {
244                 mxLog("Copying %d free parameter estimates to model %p", numParam, os);
245         }
246
247         if(numParam == 0) return;
248
249         size_t numMats = os->matrixList.size();
250         size_t numAlgs = os->algebraList.size();
251
252         os->computeCount++;
253
254         if(OMX_VERBOSE) {
255                 std::string buf;
256                 buf += string_snprintf("Call: %d.%d (%d) ", os->majorIteration, os->minorIteration, os->computeCount);
257                 buf += ("Estimates: [");
258                 for(size_t k = 0; k < numParam; k++) {
259                         buf += string_snprintf(" %f", at[k]);
260                 }
261                 buf += ("]\n");
262                 mxLogBig(buf);
263         }
264
265         for(size_t k = 0; k < numParam; k++) {
266                 omxFreeVar* freeVar = varGroup->vars[k];
267                 for(size_t l = 0; l < freeVar->locations.size(); l++) {
268                         omxFreeVarLocation *loc = &freeVar->locations[l];
269                         omxMatrix *matrix = os->matrixList[loc->matrix];
270                         int row = loc->row;
271                         int col = loc->col;
272                         omxSetMatrixElement(matrix, row, col, at[k]);
273                         if(OMX_DEBUG) {
274                                 mxLog("Setting location (%d, %d) of matrix %d to value %f for var %d",
275                                         row, col, loc->matrix, at[k], k);
276                         }
277                 }
278         }
279
280         if (RFitFunction) omxRepopulateRFitFunction(RFitFunction, at, numParam);
281
282         for(size_t i = 0; i < numMats; i++) {
283                 if (markMatrices[i]) {
284                         int offset = ~(i - numMats);
285                         omxMarkDirty(os->matrixList[offset]);
286                 }
287         }
288
289         for(size_t i = 0; i < numAlgs; i++) {
290                 if (markMatrices[i + numMats]) {
291                         omxMarkDirty(os->algebraList[i]);
292                 }
293         }
294
295         if (!os->childList) return;
296
297         for(int i = 0; i < Global->numChildren; i++) {
298                 copyParamToModel(os->childList[i]);
299         }
300 }
301
302 FitContext::~FitContext()
303 {
304         delete [] est;
305         delete [] grad;
306         delete [] hess;
307 }
308
309 omxFitFunction *FitContext::RFitFunction = NULL;
310
311 void FitContext::setRFitFunction(omxFitFunction *rff)
312 {
313         if (rff) {
314                 Global->numThreads = 1;
315                 if (RFitFunction) {
316                         error("You can only create 1 MxRFitFunction per independent model");
317                 }
318         }
319         RFitFunction = rff;
320 }
321
322 omxCompute::~omxCompute()
323 {}
324
325 void omxComputeOperation::initFromFrontend(SEXP rObj)
326 {
327         SEXP slotValue;
328         PROTECT(slotValue = GET_SLOT(rObj, install("free.group")));
329         int paramGroup = INTEGER(slotValue)[0];
330         varGroup = Global->freeGroup[paramGroup];
331 }
332
333 class omxComputeSequence : public omxCompute {
334         std::vector< omxCompute* > clist;
335
336  public:
337         virtual void initFromFrontend(SEXP rObj);
338         virtual void compute(FitContext *fc);
339         virtual void reportResults(FitContext *fc, MxRList *out);
340         virtual double getOptimizerStatus();
341         virtual ~omxComputeSequence();
342 };
343
344 class omxComputeIterate : public omxCompute {
345         std::vector< omxCompute* > clist;
346         int maxIter;
347         double tolerance;
348         bool verbose;
349
350  public:
351         virtual void initFromFrontend(SEXP rObj);
352         virtual void compute(FitContext *fc);
353         virtual void reportResults(FitContext *fc, MxRList *out);
354         virtual double getOptimizerStatus();
355         virtual ~omxComputeIterate();
356 };
357
358 class omxComputeOnce : public omxComputeOperation {
359         typedef omxComputeOperation super;
360         std::vector< omxMatrix* > algebras;
361         std::vector< omxExpectation* > expectations;
362         const char *context;
363         bool gradient;
364         bool hessian;
365
366  public:
367         virtual void initFromFrontend(SEXP rObj);
368         virtual void compute(FitContext *fc);
369         virtual void reportResults(FitContext *fc, MxRList *out);
370 };
371
372 static class omxCompute *newComputeSequence()
373 { return new omxComputeSequence(); }
374
375 static class omxCompute *newComputeIterate()
376 { return new omxComputeIterate(); }
377
378 static class omxCompute *newComputeOnce()
379 { return new omxComputeOnce(); }
380
381 struct omxComputeTableEntry {
382         char name[32];
383         omxCompute *(*ctor)();
384 };
385
386 static const struct omxComputeTableEntry omxComputeTable[] = {
387         {"MxComputeEstimatedHessian", &newComputeEstimatedHessian},
388         {"MxComputeGradientDescent", &newComputeGradientDescent},
389         {"MxComputeSequence", &newComputeSequence },
390         {"MxComputeIterate", &newComputeIterate },
391         {"MxComputeOnce", &newComputeOnce },
392         {"MxComputeNewtonRaphson", &newComputeNewtonRaphson},
393 };
394
395 omxCompute *omxNewCompute(omxState* os, const char *type)
396 {
397         omxCompute *got = NULL;
398
399         for (size_t fx=0; fx < OMX_STATIC_ARRAY_SIZE(omxComputeTable); fx++) {
400                 const struct omxComputeTableEntry *entry = omxComputeTable + fx;
401                 if(strcmp(type, entry->name) == 0) {
402                         got = entry->ctor();
403                         break;
404                 }
405         }
406
407         if (!got) error("Compute %s is not implemented", type);
408
409         return got;
410 }
411
412 void omxComputeSequence::initFromFrontend(SEXP rObj)
413 {
414         SEXP slotValue;
415         PROTECT(slotValue = GET_SLOT(rObj, install("steps")));
416
417         for (int cx = 0; cx < length(slotValue); cx++) {
418                 SEXP step = VECTOR_ELT(slotValue, cx);
419                 SEXP s4class;
420                 PROTECT(s4class = STRING_ELT(getAttrib(step, install("class")), 0));
421                 omxCompute *compute = omxNewCompute(globalState, CHAR(s4class));
422                 compute->initFromFrontend(step);
423                 if (isErrorRaised(globalState)) break;
424                 clist.push_back(compute);
425         }
426 }
427
428 void omxComputeSequence::compute(FitContext *fc)
429 {
430         for (size_t cx=0; cx < clist.size(); ++cx) {
431                 FitContext *context = fc;
432                 if (fc->varGroup != clist[cx]->varGroup) {
433                         context = new FitContext(fc, clist[cx]->varGroup);
434                 }
435                 clist[cx]->compute(context);
436                 if (context != fc) context->updateParentAndFree();
437                 if (isErrorRaised(globalState)) break;
438         }
439 }
440
441 void omxComputeSequence::reportResults(FitContext *fc, MxRList *out)
442 {
443         // put this stuff in a new list?
444         // merge with Iterate TODO
445         for (size_t cx=0; cx < clist.size(); ++cx) {
446                 FitContext *context = fc;
447                 if (fc->varGroup != clist[cx]->varGroup) {
448                         context = new FitContext(fc, clist[cx]->varGroup);
449                 }
450                 clist[cx]->reportResults(context, out);
451                 if (context != fc) context->updateParentAndFree();
452                 if (isErrorRaised(globalState)) break;
453         }
454 }
455
456 double omxComputeSequence::getOptimizerStatus()
457 {
458         // for backward compatibility, not indended to work generally
459         for (size_t cx=0; cx < clist.size(); ++cx) {
460                 double got = clist[cx]->getOptimizerStatus();
461                 if (got != NA_REAL) return got;
462         }
463         return NA_REAL;
464 }
465
466 omxComputeSequence::~omxComputeSequence()
467 {
468         for (size_t cx=0; cx < clist.size(); ++cx) {
469                 delete clist[cx];
470         }
471 }
472
473 void omxComputeIterate::initFromFrontend(SEXP rObj)
474 {
475         SEXP slotValue;
476
477         PROTECT(slotValue = GET_SLOT(rObj, install("maxIter")));
478         maxIter = INTEGER(slotValue)[0];
479
480         PROTECT(slotValue = GET_SLOT(rObj, install("tolerance")));
481         tolerance = REAL(slotValue)[0];
482         if (tolerance <= 0) error("tolerance must be positive");
483
484         PROTECT(slotValue = GET_SLOT(rObj, install("steps")));
485
486         for (int cx = 0; cx < length(slotValue); cx++) {
487                 SEXP step = VECTOR_ELT(slotValue, cx);
488                 SEXP s4class;
489                 PROTECT(s4class = STRING_ELT(getAttrib(step, install("class")), 0));
490                 omxCompute *compute = omxNewCompute(globalState, CHAR(s4class));
491                 compute->initFromFrontend(step);
492                 if (isErrorRaised(globalState)) break;
493                 clist.push_back(compute);
494         }
495
496         PROTECT(slotValue = GET_SLOT(rObj, install("verbose")));
497         verbose = asLogical(slotValue);
498 }
499
500 void omxComputeIterate::compute(FitContext *fc)
501 {
502         int iter = 0;
503         double prevFit = 0;
504         double change = tolerance * 10;
505         while (1) {
506                 for (size_t cx=0; cx < clist.size(); ++cx) {
507                         FitContext *context = fc;
508                         if (fc->varGroup != clist[cx]->varGroup) {
509                                 context = new FitContext(fc, clist[cx]->varGroup);
510                         }
511                         clist[cx]->compute(context);
512                         if (context != fc) context->updateParentAndFree();
513                         if (isErrorRaised(globalState)) break;
514                 }
515                 if (prevFit != 0) {
516                         change = prevFit - fc->fit;
517                         if (verbose) mxLog("fit %.9g change %.9g", fc->fit, change);
518                 }
519                 prevFit = fc->fit;
520                 if (isErrorRaised(globalState) || ++iter > maxIter || fabs(change) < tolerance) break;
521         }
522 }
523
524 void omxComputeIterate::reportResults(FitContext *fc, MxRList *out)
525 {
526         for (size_t cx=0; cx < clist.size(); ++cx) {
527                 FitContext *context = fc;
528                 if (fc->varGroup != clist[cx]->varGroup) {
529                         context = new FitContext(fc, clist[cx]->varGroup);
530                 }
531                 clist[cx]->reportResults(context, out);
532                 if (context != fc) context->updateParentAndFree();
533                 if (isErrorRaised(globalState)) break;
534         }
535 }
536
537 double omxComputeIterate::getOptimizerStatus()
538 {
539         // for backward compatibility, not indended to work generally
540         for (size_t cx=0; cx < clist.size(); ++cx) {
541                 double got = clist[cx]->getOptimizerStatus();
542                 if (got != NA_REAL) return got;
543         }
544         return NA_REAL;
545 }
546
547 omxComputeIterate::~omxComputeIterate()
548 {
549         for (size_t cx=0; cx < clist.size(); ++cx) {
550                 delete clist[cx];
551         }
552 }
553
554 void omxComputeOnce::initFromFrontend(SEXP rObj)
555 {
556         super::initFromFrontend(rObj);
557
558         SEXP slotValue;
559         PROTECT(slotValue = GET_SLOT(rObj, install("what")));
560         for (int wx=0; wx < length(slotValue); ++wx) {
561                 int objNum = INTEGER(slotValue)[wx];
562                 if (objNum >= 0) {
563                         omxMatrix *algebra = globalState->algebraList[objNum];
564                         if (algebra->fitFunction) {
565                                 setFreeVarGroup(algebra->fitFunction, varGroup);
566                                 omxCompleteFitFunction(algebra);
567                         }
568                         algebras.push_back(algebra);
569                 } else {
570                         omxExpectation *expectation = globalState->expectationList[~objNum];
571                         setFreeVarGroup(expectation, varGroup);
572                         omxCompleteExpectation(expectation);
573                         expectations.push_back(expectation);
574                 }
575         }
576
577         PROTECT(slotValue = GET_SLOT(rObj, install("context")));
578         if (length(slotValue) == 0) {
579                 // OK
580         } else if (length(slotValue) == 1) {
581                 SEXP elem;
582                 PROTECT(elem = STRING_ELT(slotValue, 0));
583                 context = CHAR(elem);
584         }
585
586         PROTECT(slotValue = GET_SLOT(rObj, install("gradient")));
587         gradient = asLogical(slotValue);
588
589         PROTECT(slotValue = GET_SLOT(rObj, install("hessian")));
590         hessian = asLogical(slotValue);
591 }
592
593 void omxComputeOnce::compute(FitContext *fc)
594 {
595         if (algebras.size()) {
596                 int want = FF_COMPUTE_FIT;
597                 if (gradient) want |= FF_COMPUTE_GRADIENT;
598                 if (hessian)  want |= FF_COMPUTE_HESSIAN;
599
600                 for (size_t wx=0; wx < algebras.size(); ++wx) {
601                         omxMatrix *algebra = algebras[wx];
602                         if (algebra->fitFunction) {
603                                 omxFitFunctionCompute(algebra->fitFunction, want, fc);
604                                 fc->fit = algebra->data[0];
605                                 if (hessian) fc->fixHessianSymmetry();
606                         } else {
607                                 omxForceCompute(algebra);
608                         }
609                 }
610         } else if (expectations.size()) {
611                 for (size_t wx=0; wx < expectations.size(); ++wx) {
612                         omxExpectation *expectation = expectations[wx];
613                         omxExpectationCompute(expectation, context);
614                 }
615         }
616 }
617
618 void omxComputeOnce::reportResults(FitContext *fc, MxRList *out)
619 {
620         if (algebras.size()==0 || algebras[0]->fitFunction == NULL) return;
621
622         omxMatrix *algebra = algebras[0];
623
624         omxPopulateFitFunction(algebra, out);
625
626         out->push_back(std::make_pair(mkChar("minimum"), ScalarReal(fc->fit)));
627
628         size_t numFree = fc->varGroup->vars.size();
629         if (numFree) {
630                 SEXP estimate;
631                 PROTECT(estimate = allocVector(REALSXP, numFree));
632                 memcpy(REAL(estimate), fc->est, sizeof(double)*numFree);
633                 out->push_back(std::make_pair(mkChar("estimate"), estimate));
634
635                 if (gradient) {
636                         SEXP Rgradient;
637                         PROTECT(Rgradient = allocVector(REALSXP, numFree));
638                         memcpy(REAL(Rgradient), fc->grad, sizeof(double) * numFree);
639                         out->push_back(std::make_pair(mkChar("gradient"), Rgradient));
640                 }
641
642                 if (hessian) {
643                         SEXP Rhessian;
644                         PROTECT(Rhessian = allocMatrix(REALSXP, numFree, numFree));
645                         memcpy(REAL(Rhessian), fc->hess, sizeof(double) * numFree * numFree);
646                         out->push_back(std::make_pair(mkChar("hessian"), Rhessian));
647                 }
648         }
649 }