Take abs(ev) when inverting Hessian (suggested by Timo von Oertzen)
[openmx:openmx.git] / src / Compute.cpp
1 /*
2  *  Copyright 2013 The OpenMx Project
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *       http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *   Unless required by applicable law or agreed to in writing, software
11  *   distributed under the License is distributed on an "AS IS" BASIS,
12  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16
17 #include <algorithm>
18 #include <stdarg.h>
19
20 //#include <iostream>
21
22 #include "Compute.h"
23 #include "omxState.h"
24 #include "omxExportBackendState.h"
25 #include "omxRFitFunction.h"
26 #include "matrix.h"
27 #include "omxBuffer.h"
28 #include "omxState.h"
29
30 void pda(const double *ar, int rows, int cols);
31
32 void FitContext::queue(HessianBlock *hb)
33 {
34         if (hb->vars.size() == 0) {
35                 delete hb;
36                 return;
37         }
38
39         minBlockSize = std::max(int(hb->vars.size()), minBlockSize);
40         allBlocks.push_back(hb);
41 }
42
43 void FitContext::analyzeHessianBlock(HessianBlock *hb)
44 {
45         for (size_t vx=0; vx < hb->vars.size(); ++vx) {
46                 HessianBlock *hb2 = blockByVar[ hb->vars[vx] ];
47                 if (!hb2) continue;
48
49                 for (size_t vx=0; vx < hb2->vars.size(); ++vx) {
50                         blockByVar[ hb2->vars[vx] ] = NULL;
51                 }
52                 estNonZero -= hb2->estNonZero();
53
54                 if (hb->vars.size() < hb2->vars.size()) std::swap(hb, hb2);
55
56                 // std::cout << "hb: ";
57                 // for (int i : hb->vars) std::cout << i << ' ';
58                 // std::cout << "\nhb2: ";
59                 // for (int i : hb2->vars) std::cout << i << ' ';
60                 // std::cout << "\n";
61                 bool inc = std::includes(hb->vars.begin(), hb->vars.end(), hb2->vars.begin(), hb2->vars.end());
62                 //std::cout << "includes " << inc << "\n";
63                 if (inc) {
64                         hb->subBlocks.push_back(hb2);
65                 } else {
66                         HessianBlock *parent = new HessianBlock;
67                         mergeBlocks.push_back(parent);
68                         parent->merge = true;
69                         parent->vars = hb->vars;
70                         parent->vars.insert(parent->vars.end(), hb2->vars.begin(), hb2->vars.end());
71                         std::inplace_merge(parent->vars.begin(), parent->vars.begin() + hb->vars.size(), parent->vars.end());
72                         int nsize = std::unique(parent->vars.begin(), parent->vars.end()) - parent->vars.begin();
73                         parent->vars.resize(nsize);
74                         parent->mat.resize(nsize, nsize);
75                         parent->mat.triangularView<Eigen::Upper>().setZero();
76                         parent->subBlocks.push_back(hb);
77                         parent->subBlocks.push_back(hb2);
78                         analyzeHessianBlock(parent);
79                         return;
80                 }
81         }
82
83         for (size_t vx=0; vx < hb->vars.size(); ++vx) {
84                 blockByVar[ hb->vars[vx] ] = hb;
85         }
86         estNonZero += hb->estNonZero();
87         maxBlockSize = std::max(int(hb->vars.size()), maxBlockSize);
88         //mxLog("queue maxBlocksize %d sparse %f", maxBlockSize, estNonZero / double(numParam * numParam));
89 }
90
91 void FitContext::analyzeHessian()
92 {
93         // If we knew the minBlockSize was large then we wouldn't even
94         // try to build merge blocks. 
95         // If maxBlockSize is greater than some threshold then we should
96         // also give up.
97
98         if (blockByVar.size()) return;
99
100         blockByVar.assign(numParam, NULL);
101
102         for (size_t bx=0; bx < allBlocks.size(); ++bx) {
103                 analyzeHessianBlock(allBlocks[bx]);
104         }
105 }
106
107 void FitContext::negateHessian()
108 {
109         // this assumes that we haven't processed the blocks yet, need to check TODO
110         for (size_t bx=0; bx < allBlocks.size(); ++bx) {
111                 allBlocks[bx]->mat *= -1.0;
112         }
113 }
114
115 void FitContext::refreshDenseHess()
116 {
117         if (haveDenseHess) return;
118
119         hess.triangularView<Eigen::Upper>().setZero();
120
121         for (size_t bx=0; bx < allBlocks.size(); ++bx) {
122                 HessianBlock *hb = allBlocks[bx];
123
124                 std::vector<int> &map = hb->vars;
125                 size_t bsize = map.size();
126
127                 for (size_t v1=0; v1 < bsize; ++v1) {
128                         for (size_t v2=0; v2 <= v1; ++v2) {
129                                 hess(map[v2], map[v1]) += hb->mat(v2, v1);
130                         }
131                 }
132         }
133
134         haveDenseHess = true;
135 }
136
137 void FitContext::copyDenseHess(double *dest)
138 {
139         refreshDenseHess();
140         for (size_t v1=0; v1 < numParam; ++v1) {
141                 for (size_t v2=0; v2 <= v1; ++v2) {
142                         double coef = hess.selfadjointView<Eigen::Upper>()(v2,v1);
143                         if (v1==v2) {
144                                 dest[v1 * numParam + v2] = coef;
145                         } else {
146                                 dest[v1 * numParam + v2] = coef;
147                                 dest[v2 * numParam + v1] = coef;
148                         }
149                 }
150         }
151 }
152
153 double *FitContext::getDenseHessUninitialized()
154 {
155         // Assume the caller is going to fill it out
156         haveDenseHess = true;
157         haveDenseIHess = false;
158         return hess.data();
159 }
160
161 static void InvertSymmetricNR(Eigen::MatrixXd &hess, Eigen::MatrixXd &ihess)
162 {
163         ihess = hess;
164         Matrix ihessMat(ihess.data(), ihess.rows(), ihess.cols());
165         if (!InvertSymmetricPosDef(ihessMat, 'U')) return;
166
167         int numParams = hess.rows();
168         omxBuffer<double> hessWork(numParams * numParams);
169         memcpy(hessWork.data(), hess.data(), sizeof(double) * numParams * numParams);
170         //pda(hess.data(), numParams, numParams);
171
172         char jobz = 'V';
173         char range = 'A';
174         char uplo = 'U';
175         double abstol = 0;
176         int m;
177         omxBuffer<double> w(numParams);
178         omxBuffer<double> z(numParams * numParams);
179         double optWork;
180         int optIwork;
181         int lwork = -1;
182         int liwork = -1;
183         int info;
184         double realIgn = 0;
185         int intIgn = 0;
186         omxBuffer<int> isuppz(numParams * 2);
187
188         F77_CALL(dsyevr)(&jobz, &range, &uplo, &numParams, hessWork.data(),
189                          &numParams, &realIgn, &realIgn, &intIgn, &intIgn, &abstol, &m, w.data(),
190                          z.data(), &numParams, isuppz.data(), &optWork, &lwork, &optIwork, &liwork, &info);
191
192         lwork = optWork;
193         omxBuffer<double> work(lwork);
194         liwork = optIwork;
195         omxBuffer<int> iwork(liwork);
196         F77_CALL(dsyevr)(&jobz, &range, &uplo, &numParams, hessWork.data(),
197                          &numParams, &realIgn, &realIgn, &intIgn, &intIgn, &abstol, &m, w.data(),
198                          z.data(), &numParams, isuppz.data(), work.data(), &lwork, iwork.data(), &liwork, &info);
199         if (info < 0) {
200                 Rf_error("dsyevr %d", info);
201         } else if (info) {
202                 mxLog("Eigen decomposition failed %d", info);
203                 ihess = Eigen::MatrixXd::Zero(numParams, numParams);
204                 return;
205         }
206
207         std::vector<double> evalDiag(numParams * numParams);
208         for (int px=0; px < numParams; ++px) {
209                 evalDiag[px * numParams + px] = 1/fabs(w[px]);
210         }
211
212         Matrix evMat(z.data(), numParams, numParams);
213         Matrix edMat(evalDiag.data(), numParams, numParams);
214         omxBuffer<double> prod1(numParams * numParams);
215         Matrix p1Mat(prod1.data(), numParams, numParams);
216         SymMatrixMultiply('R', 'U', 1.0, 0, edMat, evMat, p1Mat);
217         char transa = 'N';
218         char transb = 'T';
219         double alpha = 1.0;
220         double beta = 0;
221         F77_CALL(dgemm)(&transa, &transb, &numParams, &numParams, &numParams, &alpha,
222                         prod1.data(), &numParams, z.data(), &numParams, &beta, ihess.data(), &numParams);
223         //pda(ihess.data(), numParams, numParams);
224 }
225
226 void FitContext::refreshDenseIHess()
227 {
228         if (haveDenseIHess) return;
229
230         refreshDenseHess();
231         InvertSymmetricNR(hess, ihess);
232
233         ihessFiltered = true;
234         haveDenseIHess = true;
235 }
236
237 void FitContext::refreshSparseHess()
238 {
239         if (haveSparseHess) return;
240
241         sparseHess.resize(numParam, numParam);
242         sparseHess.setZero();
243
244         // need to sort allBlocks for performance TODO
245         for (size_t bx=0; bx < allBlocks.size(); ++bx) {
246                 HessianBlock *hb = allBlocks[bx];
247
248                 std::vector<int> &map = hb->vars;
249                 size_t bsize = map.size();
250
251                 for (size_t v1=0; v1 < bsize; ++v1) {
252                         for (size_t v2=0; v2 <= v1; ++v2) {
253                                 sparseHess.coeffRef(map[v2], map[v1]) += hb->mat(v2, v1);
254                         }
255                 }
256         }
257
258         haveSparseHess = true;
259 }
260
261 static double norm1(const Eigen::SparseMatrix<double> &mat)
262 {
263         // norm_1, maximum column sum
264         double maxCol = 0;
265         for (int k=0; k < mat.outerSize(); ++k) {
266                 double col = 0;
267                 for (Eigen::SparseMatrix<double>::InnerIterator it(mat,k); it; ++it) {
268                         col += fabs(it.value());
269                 }
270                 maxCol = std::max(maxCol, col);
271         }
272         return maxCol;
273 }
274
275 static double frobeniusNorm(const Eigen::SparseMatrix<double> &mat)
276 {
277         double total=0;
278         for (int k=0; k < mat.outerSize(); ++k) {
279                 for (Eigen::SparseMatrix<double>::InnerIterator it(mat,k); it; ++it) {
280                         total += it.value() * it.value();
281                 }
282         }
283         return total / mat.nonZeros();
284 }
285
286 static bool soleymani2013(const Eigen::SparseMatrix<double> &mat, Eigen::SparseMatrix<double> &imat)
287 {
288        imat.setIdentity();
289        imat /= frobeniusNorm(mat);
290
291        Eigen::SparseMatrix<double> I1(mat.rows(), mat.cols());
292        I1.setIdentity();
293        Eigen::SparseMatrix<double> I7(mat.rows(), mat.cols());
294        I7 = I1 * 7;
295        Eigen::SparseMatrix<double> I21(mat.rows(), mat.cols());
296        I21 = I1 * 21;
297        Eigen::SparseMatrix<double> I35(mat.rows(), mat.cols());
298        I35 = I1 * 35;
299
300        double maxCol;
301        int iter = 0;
302        Eigen::SparseMatrix<double> AV = mat.selfadjointView<Eigen::Upper>() * imat;
303        while (++iter < 100) {
304                // try prune() on the multiplications TODO
305                // try selfadjointView TODO
306                Eigen::SparseMatrix<double> rpart =
307                        (AV * (AV * (AV * (AV * (AV * (AV - I7) + I21) -I35) + I35) -I21) + I7);
308
309                Eigen::SparseMatrix<double> prev = imat;
310                imat = prev * rpart;
311                AV = mat.selfadjointView<Eigen::Upper>() * imat;
312
313                Eigen::SparseMatrix<double> diff = I1 - AV;
314                maxCol = norm1(diff);
315                if (maxCol < 1e-6) {
316                        break;
317                }
318        }
319        //mxLog("soleymani2013 invert in %d iter, maxCol %.2g", iter, maxCol);
320        return false;
321 }
322
323 SEXP sparseInvert_wrapper(SEXP Rmat)
324 {
325         omxManageProtectInsanity mpi;
326
327         SEXP matrixDims;
328         Rf_protect(matrixDims = Rf_getAttrib(Rmat, R_DimSymbol));
329         int *dimList = INTEGER(matrixDims);
330         int rows = dimList[0];
331         int cols = dimList[1];
332         if (rows != cols) Rf_error("Must be square");
333
334         double *matData = REAL(Rmat);
335
336         Eigen::SparseMatrix<double> mat(rows,cols);
337         for (int cx=0; cx < cols; ++cx) {
338                 for (int rx=0; rx < rows; ++rx) {
339                         double val = matData[cx * rows + rx];
340                         if (val == 0) continue;
341                         mat.coeffRef(rx, cx) = val;
342                 }
343         }
344
345         Eigen::SparseMatrix<double> imat(rows,cols);
346         if (soleymani2013(mat, imat)) Rf_error("Invert failed");
347         
348         SEXP ret;
349         Rf_protect(ret = Rf_allocMatrix(REALSXP, rows, cols));
350         double *retData = REAL(ret);
351         for (int cx=0; cx < cols; ++cx) {
352                 for (int rx=0; rx < rows; ++rx) {
353                         retData[cx * rows + rx] = imat.coeff(rx, cx);
354                 }
355         }
356
357         return ret;
358 }
359
360 void FitContext::testMerge()
361 {
362         const int UseId = 2;
363         
364         analyzeHessian();
365
366         //std::cout << "block count " << allBlocks.size() << std::endl;
367
368         sparseHess.resize(numParam, numParam);
369         sparseHess.setZero();
370
371         for (size_t vx=0; vx < numParam; ++vx) {
372                 HessianBlock *hb = blockByVar[vx];
373                 hb->addSubBlocks();
374         }
375
376         for (size_t vx=0; vx < numParam; ++vx) {
377                 HessianBlock *hb = blockByVar[vx];
378                 if (hb->useId == UseId) continue;
379                 hb->useId = UseId;
380
381                 //std::cout << hb->id << " ";
382
383                 size_t size = hb->mmat.rows();
384                 for (size_t col=0; col < size; ++col) {
385                         for (size_t row=0; row <= col; ++row) {
386                                 int vr = hb->vars[row];
387                                 int vc = hb->vars[col];
388                                 sparseHess.coeffRef(vr,vc) = hb->mmat(row,col);
389                         }
390                 }
391         }
392         //std::cout << "\n";
393
394         refreshDenseHess();
395         Eigen::MatrixXd dense = sparseHess;
396         Eigen::MatrixXd diff = (dense - hess).selfadjointView<Eigen::Upper>();
397         //std::cout << diff << std::endl;
398         //std::cout << "fancy\n" << dense << std::endl;
399         //std::cout << "correct\n" << hess << std::endl;
400         double bad = diff.cwiseAbs().maxCoeff();
401         if (bad > .0001) Rf_error("Hess: dense sparse mismatch %f", bad);
402 }
403
404 bool FitContext::refreshSparseIHess()
405 {
406         if (haveSparseIHess) return true;
407
408         const int AcceptableDenseInvertSize = 100;
409         const bool checkResult = false;
410
411         //testMerge();
412
413         sparseIHess.resize(numParam, numParam);
414         sparseIHess.setZero();
415
416         // sparseness by size simulation
417         //mxLog("minBlockSize %d maxBlocksize %d sparse %f", maxBlockSize, estNonZero / double(numParam * numParam));
418
419         if (minBlockSize < AcceptableDenseInvertSize) {
420                 analyzeHessian();
421         }
422         if (maxBlockSize < std::min(int(numParam), AcceptableDenseInvertSize)) {
423                 const int UseId = 1;
424                 for (size_t vx=0; vx < numParam; ++vx) {
425                         HessianBlock *hb = blockByVar[vx];
426                         if (hb->useId == UseId) continue;
427                         hb->useId = UseId;
428
429                         hb->addSubBlocks();
430                         size_t size = hb->mmat.rows();
431
432                         InvertSymmetricNR(hb->mmat, hb->imat);
433                 
434                         for (size_t col=0; col < size; ++col) {
435                                 for (size_t row=0; row <= col; ++row) {
436                                         int vr = hb->vars[row];
437                                         int vc = hb->vars[col];
438                                         sparseIHess.coeffRef(vr,vc) = hb->imat(row,col);
439                                 }
440                         }
441                 }
442         } else {
443                 return false;
444
445                 // Needs more work TODO
446                 // if (estNonZero / double(numParam * numParam) < .2)
447                 refreshSparseHess();
448                 if (soleymani2013(sparseHess, sparseIHess)) {
449                         sparseIHess.setZero();  // NR will try steepest descent
450                 }
451         }
452
453         if (checkResult) {
454                 int nonZero = 0;
455                 refreshDenseHess();
456                 for (size_t cx=0; cx < numParam; ++cx) {
457                         for (size_t rx=0; rx <= cx; ++rx) {
458                                 if (hess(rx,cx) != 0) {
459                                         if (rx == cx) nonZero += 1;
460                                         else nonZero += 2;
461                                 }
462                         }
463                 }
464                 Eigen::MatrixXd denseI = sparseIHess;
465                 denseI.triangularView<Eigen::Lower>() = denseI.transpose().triangularView<Eigen::Lower>();
466                 Eigen::MatrixXd resid = denseI * hess.selfadjointView<Eigen::Upper>();
467                 for (int dx=0; dx < resid.rows(); ++dx) {
468                         resid.coeffRef(dx,dx) -= 1;
469                 }
470                 double bad = resid.cwiseAbs().maxCoeff();
471                 if (bad > .01) {
472                         Rf_error("IHess: dense sparse mismatch %f", bad);
473                 }
474
475                 if (0) {
476                 mxLog("done %f maxBlocksize %d est sparse %f actual sparse %f", bad, maxBlockSize,
477                       estNonZero / double(numParam * numParam),
478                       nonZero / double(numParam * numParam));
479                 }
480         }
481
482         ihessFiltered = true;
483         haveSparseIHess = true;
484         return true;
485 }
486
487 Eigen::VectorXd FitContext::ihessGradProd()
488 {
489         if (refreshSparseIHess()) {
490                 return sparseIHess.selfadjointView<Eigen::Upper>() * grad;
491         } else {
492                 refreshDenseIHess();
493                 return ihess.selfadjointView<Eigen::Upper>() * grad;
494         }
495 }
496
497 Eigen::VectorXd FitContext::ihessDiag()
498 {
499         refreshDenseIHess();
500         return ihess.diagonal();
501 }
502
503 double *FitContext::getDenseIHessUninitialized()
504 {
505         // Assume the caller is going to fill it out
506         haveDenseIHess = true;
507         haveDenseHess = false;
508         return ihess.data();
509 }
510
511 // NOTE: This may be a filtered inverse from InvertSymmetricNR
512 void FitContext::copyDenseIHess(double *dest)
513 {
514         refreshDenseIHess();
515
516         for (size_t v1=0; v1 < numParam; ++v1) {
517                 for (size_t v2=0; v2 <= v1; ++v2) {
518                         double coef = ihess.selfadjointView<Eigen::Upper>()(v2,v1);
519                         if (v1==v2) {
520                                 dest[v1 * numParam + v2] = coef;
521                         } else {
522                                 dest[v1 * numParam + v2] = coef;
523                                 dest[v2 * numParam + v1] = coef;
524                         }
525                 }
526         }
527 }
528
529 double *FitContext::getDenseHessianish()
530 {
531         if (haveDenseHess) return hess.data();
532         if (haveDenseIHess) {
533                 if (ihessFiltered) Rf_error("Attempted FitContext::getDenseHessianish with filtered ihess");
534                 return ihess.data();
535         }
536         // try harder TODO
537         return NULL;
538 }
539
540 HessianBlock *HessianBlock::clone()
541 {
542         HessianBlock *hb = new HessianBlock;
543         hb->vars = vars;
544         hb->mat.resize(vars.size(), vars.size());
545         return hb;
546 }
547
548 void HessianBlock::addSubBlocks()
549 {
550         if (mmat.rows()) return;
551
552         mmat = mat;
553
554         std::vector<int> vmap; // we could localize the vars map instead of computing the mapping each time TODO
555
556         for (size_t bx=0; bx < subBlocks.size(); ++bx) {
557                 HessianBlock *sb = subBlocks[bx];
558                 sb->addSubBlocks();
559         }
560
561         //std::cout << "initial " << id << "\n" << mmat << std::endl;
562
563         for (size_t bx=0; bx < subBlocks.size(); ++bx) {
564                 HessianBlock *sb = subBlocks[bx];
565                 
566                 //std::cout << "subblock " << sb->id << "\n" << sb->mmat << std::endl;
567
568                 size_t numVars = sb->vars.size();
569                 vmap.resize(numVars);
570                 for (size_t vx=0; vx < numVars; ++vx) {
571                         // std::set has special method for this TODO
572                         int px = std::lower_bound(vars.begin(), vars.end(), sb->vars[vx]) - vars.begin();
573                         vmap[vx] = px;
574                 }
575
576                 for (size_t v1=0; v1 < numVars; ++v1) {
577                         for (size_t v2=0; v2 <= v1; ++v2) {
578                                 mmat(vmap[v2], vmap[v1]) += sb->mmat(v2, v1);
579                         }
580                 }
581         }
582         
583         //std::cout << "result " << id << "\n" << mmat << std::endl;
584 }
585
586 int HessianBlock::estNonZero() const
587 {
588         if (!merge) {
589                 return int(vars.size() * vars.size());
590         } else {
591                 int total = 0;
592                 for (size_t bx=0; bx < subBlocks.size(); ++bx) {
593                         HessianBlock *sb = subBlocks[bx];
594                         total += sb->estNonZero();
595                 }
596                 return std::min(total, int(vars.size() * vars.size()));
597         }
598 }
599
600 void FitContext::init()
601 {
602         numParam = varGroup->vars.size();
603         wanted = 0;
604         mac = parent? parent->mac : 0;
605         fit = parent? parent->fit : NA_REAL;
606         est = new double[numParam];
607         infoDefinite = NA_LOGICAL;
608         infoCondNum = NA_REAL;
609         infoA = NULL;
610         infoB = NULL;
611         stderrs = NULL;
612         inform = INFORM_UNINITIALIZED;
613         iterations = 0;
614
615         hess.resize(numParam, numParam);
616         ihess.resize(numParam, numParam);
617         clearHessian();
618 }
619
620 void FitContext::clearHessian()
621 {
622         for (size_t bx=0; bx < mergeBlocks.size(); ++bx) {
623                 delete mergeBlocks[bx];
624         }
625         for (size_t bx=0; bx < allBlocks.size(); ++bx) {
626                 delete allBlocks[bx];
627         }
628
629         allBlocks.clear();
630         mergeBlocks.clear();
631         blockByVar.clear();
632         haveSparseHess = false;
633         haveSparseIHess = false;
634         haveDenseHess = false;
635         haveDenseIHess = false;
636         estNonZero = 0;
637         minBlockSize = 0;
638         maxBlockSize = 0;
639         ihessFiltered = false;
640 }
641
642 void FitContext::allocStderrs()
643 {
644         if (stderrs) return;
645
646         size_t numParam = varGroup->vars.size();
647         stderrs = new double[numParam];
648
649         for (size_t px=0; px < numParam; ++px) {
650                 stderrs[px] = NA_REAL;
651         }
652 }
653
654 FitContext::FitContext(std::vector<double> &startingValues)
655 {
656         parent = NULL;
657         varGroup = Global->freeGroup[FREEVARGROUP_ALL];
658         init();
659
660         size_t numParam = varGroup->vars.size();
661         if (startingValues.size() != numParam) {
662                 Rf_error("Got %d starting values for %d parameters",
663                       startingValues.size(), numParam);
664         }
665         memcpy(est, startingValues.data(), sizeof(double) * numParam);
666 }
667
668 FitContext::FitContext(FitContext *parent, FreeVarGroup *varGroup)
669 {
670         this->parent = parent;
671         this->varGroup = varGroup;
672         init();
673
674         FreeVarGroup *src = parent->varGroup;
675         FreeVarGroup *dest = varGroup;
676         size_t dvars = varGroup->vars.size();
677         if (dvars == 0) return;
678         mapToParent.resize(dvars);
679
680         size_t d1 = 0;
681         for (size_t s1=0; s1 < src->vars.size(); ++s1) {
682                 if (src->vars[s1] != dest->vars[d1]) continue;
683                 mapToParent[d1] = s1;
684                 est[d1] = parent->est[s1];
685                 if (++d1 == dvars) break;
686         }
687         if (d1 != dvars) Rf_error("Parent free parameter group (id=%d) is not a superset of %d",
688                                src->id[0], dest->id[0]);
689
690         wanted = parent->wanted;
691         infoDefinite = parent->infoDefinite;
692         infoCondNum = parent->infoCondNum;
693         IterationError = parent->IterationError;
694         iterations = parent->iterations;
695 }
696
697 void FitContext::copyParamToModel(omxMatrix *mat)
698 { copyParamToModel(mat->currentState); }
699
700 void FitContext::copyParamToModel(omxMatrix *mat, double *at)
701 { copyParamToModel(mat->currentState, at); }
702
703 void FitContext::updateParent()
704 {
705         FreeVarGroup *src = varGroup;
706         FreeVarGroup *dest = parent->varGroup;
707         size_t svars = varGroup->vars.size();
708
709         parent->wanted |= wanted;
710         parent->fit = fit;
711         parent->IterationError = IterationError;
712         parent->mac = mac;
713         parent->infoDefinite = infoDefinite;
714         parent->infoCondNum = infoCondNum;
715         parent->iterations = iterations;
716
717         // rewrite using mapToParent TODO
718
719         if (svars > 0) {
720                 size_t s1 = 0;
721                 for (size_t d1=0; d1 < dest->vars.size(); ++d1) {
722                         if (dest->vars[d1] != src->vars[s1]) continue;
723                         parent->est[d1] = est[s1];
724                         if (++s1 == svars) break;
725                 }
726                 if (stderrs) {
727                         parent->allocStderrs();
728                         for (size_t s1=0; s1 < src->vars.size(); ++s1) {
729                                 parent->stderrs[mapToParent[s1]] = stderrs[s1];
730                         }
731                 }
732         }
733         
734         // pda(est, 1, svars);
735         // pda(parent->est, 1, dvars);
736 }
737
738 void FitContext::updateParentAndFree()
739 {
740         updateParent();
741         delete this;
742 }
743
744 void FitContext::log(int what)
745 {
746         size_t count = varGroup->vars.size();
747         std::string buf;
748         if (what & FF_COMPUTE_MAXABSCHANGE) buf += string_snprintf("MAC: %.5f\n", mac);
749         if (what & FF_COMPUTE_FIT) buf += string_snprintf("fit: %.5f (scale %f)\n", fit, Global->llScale);
750         if (what & FF_COMPUTE_ESTIMATE) {
751                 buf += string_snprintf("est %d: c(", (int) count);
752                 for (size_t vx=0; vx < count; ++vx) {
753                         buf += string_snprintf("%.5f", est[vx]);
754                         if (vx < count - 1) buf += ", ";
755                 }
756                 buf += ")\n";
757         }
758         mxLogBig(buf);
759 }
760
761 void FitContext::recordIterationError(const char* msg, ...)
762 {
763         // could record the parameter vector here for a better error message TODO
764         va_list ap;
765         va_start(ap, msg);
766         std::string str = string_vsnprintf(msg, ap);
767         va_end(ap);
768
769 #pragma omp critical(IterationError)
770         IterationError = str;
771 }
772
773 std::string FitContext::getIterationError()
774 {
775         return IterationError;
776 }
777
778 static void _fixSymmetry(const char *name, double *mat, size_t numParam, bool force)
779 {
780         for (size_t h1=1; h1 < numParam; h1++) {
781                 for (size_t h2=0; h2 < h1; h2++) {
782                         if (!force && mat[h2 * numParam + h1] != 0) {
783                                 omxRaiseErrorf("%s is not upper triangular", name);
784                                 break;
785                         }
786                         mat[h2 * numParam + h1] = mat[h1 * numParam + h2];
787                 }
788         }
789 }
790
791 static void omxRepopulateRFitFunction(omxFitFunction* oo, double* x, int n)
792 {
793         omxRFitFunction* rFitFunction = (omxRFitFunction*)oo->argStruct;
794
795         SEXP theCall, estimate;
796
797         Rf_protect(estimate = Rf_allocVector(REALSXP, n));
798         double *est = REAL(estimate);
799         for(int i = 0; i < n ; i++) {
800                 est[i] = x[i];
801         }
802
803         Rf_protect(theCall = Rf_allocVector(LANGSXP, 4));
804
805         SETCAR(theCall, Rf_install("imxUpdateModelValues"));
806         SETCADR(theCall, rFitFunction->model);
807         SETCADDR(theCall, rFitFunction->flatModel);
808         SETCADDDR(theCall, estimate);
809
810         R_Reprotect(rFitFunction->model = Rf_eval(theCall, R_GlobalEnv), rFitFunction->modelIndex);
811
812         Rf_unprotect(2); // theCall, estimate
813 }
814
815 void FitContext::copyParamToModel(omxState* os)
816 {
817         copyParamToModel(os, est);
818 }
819
820 void FitContext::copyParamToModel(omxState* os, double *at)
821 {
822         size_t numParam = varGroup->vars.size();
823
824         if(numParam == 0) return;
825
826         // Confidence Intervals & Hessian Calculation probe the parameter space
827         // near the best estimate. If stale, we need to restore the best estimate
828         // before returning results to the user.
829         os->stale = at != est;
830
831         os->computeCount++;
832
833         if(OMX_VERBOSE) {
834                 std::string buf;
835                 buf += string_snprintf("Call: %d (%d) ", iterations, (int) os->computeCount);
836                 buf += ("Estimates: [");
837                 for(size_t k = 0; k < numParam; k++) {
838                         buf += string_snprintf(" %f", at[k]);
839                 }
840                 buf += ("]\n");
841                 mxLogBig(buf);
842         }
843
844         for(size_t k = 0; k < numParam; k++) {
845                 omxFreeVar* freeVar = varGroup->vars[k];
846                 for(size_t l = 0; l < freeVar->locations.size(); l++) {
847                         omxFreeVarLocation *loc = &freeVar->locations[l];
848                         omxMatrix *matrix = os->matrixList[loc->matrix];
849                         int row = loc->row;
850                         int col = loc->col;
851                         omxSetMatrixElement(matrix, row, col, at[k]);
852                         if(OMX_DEBUG) {
853                                 mxLog("Setting location (%d, %d) of matrix %d to value %f for var %d",
854                                         row, col, loc->matrix, at[k], (int) k);
855                         }
856                 }
857         }
858
859         if (RFitFunction) omxRepopulateRFitFunction(RFitFunction, at, numParam);
860
861         varGroup->markDirty(os);
862
863         if (os->childList.size() == 0) return;
864
865         for(size_t i = 0; i < os->childList.size(); i++) {
866                 copyParamToModel(os->childList[i], at);
867         }
868 }
869
870 double *FitContext::take(int want)
871 {
872         if (!(want & (wanted | FF_COMPUTE_ESTIMATE))) {
873                 Rf_error("Attempt to take %d but not available", want);
874         }
875
876         double *ret = NULL;
877         switch(want) {
878         case FF_COMPUTE_ESTIMATE:
879                 ret = est;
880                 est = NULL;
881                 break;
882         default:
883                 Rf_error("Taking of %d is not implemented", want);
884         }
885         if (!ret) Rf_error("Attempt to take %d, already taken", want);
886         return ret;
887 }
888
889 // Rethink this whole design TODO
890 // If we compute things blockwise then most of this can go away?
891 void FitContext::preInfo()
892 {
893         size_t npsq = numParam * numParam;
894
895         if (!infoA) infoA = new double[npsq];
896         if (!infoB) infoB = new double[npsq];
897
898         switch (infoMethod) {
899         case INFO_METHOD_SANDWICH:
900         case INFO_METHOD_MEAT:
901                 OMXZERO(infoB, npsq);
902         case INFO_METHOD_BREAD:
903                 OMXZERO(infoA, npsq);
904                 break;
905         case INFO_METHOD_HESSIAN:
906                 clearHessian();
907                 break;
908         default:
909                 Rf_error("Unknown information matrix estimation method %d", infoMethod);
910         }
911 }
912
913 void FitContext::postInfo()
914 {
915         size_t numParam = varGroup->vars.size();
916         switch (infoMethod) {
917         case INFO_METHOD_SANDWICH:{
918                 // move into FCDeriv TODO
919                 omxBuffer<double> work(numParam * numParam);
920                 Matrix amat(infoA, numParam, numParam);
921                 InvertSymmetricIndef(amat, 'U');
922                 _fixSymmetry("InfoB", infoB, numParam, false);
923                 Matrix bmat(infoB, numParam, numParam);
924                 Matrix wmat(work.data(), numParam, numParam);
925                 Matrix hmat(getDenseIHessUninitialized(), numParam, numParam);
926                 SymMatrixMultiply('L', 'U', 1, 0, amat, bmat, wmat);
927                 SymMatrixMultiply('R', 'U', 1, 0, amat, wmat, hmat);
928                 wanted |= FF_COMPUTE_IHESSIAN;
929                 break;}
930         case INFO_METHOD_MEAT:{
931                 memcpy(getDenseHessUninitialized(), infoB, sizeof(double) * numParam * numParam); // avoid copy TODO
932                 wanted |= FF_COMPUTE_HESSIAN;
933                 break;}
934         case INFO_METHOD_BREAD:{
935                 memcpy(getDenseHessUninitialized(), infoA, sizeof(double) * numParam * numParam); // avoid copy TODO
936                 wanted |= FF_COMPUTE_HESSIAN;
937                 break;}
938         case INFO_METHOD_HESSIAN:
939                 if (Global->llScale > 0) negateHessian();
940                 wanted |= FF_COMPUTE_HESSIAN;
941                 break;
942         default:
943                 Rf_error("Unknown information matrix estimation method %d", infoMethod);
944         }
945 }
946
947 FitContext::~FitContext()
948 {
949         clearHessian();
950         if (est) delete [] est;
951         if (stderrs) delete [] stderrs;
952         if (infoA) delete [] infoA;
953         if (infoB) delete [] infoB;
954 }
955
956 omxFitFunction *FitContext::RFitFunction = NULL;
957
958 void FitContext::setRFitFunction(omxFitFunction *rff)
959 {
960         if (rff) {
961                 Global->numThreads = 1;
962                 if (RFitFunction) {
963                         Rf_error("You can only create 1 MxRFitFunction per independent model");
964                 }
965         }
966         RFitFunction = rff;
967 }
968
969 Ramsay1975::Ramsay1975(FitContext *fc, const char *flavor, int verbose, double minCaution) :
970         fc(fc), flavor(flavor), verbose(verbose), minCaution(minCaution)
971 {
972         if (!flavor) Rf_error("Ramsay: flavor cannot be NULL");
973
974         maxCaution = 0.0;
975         boundsHit = 0;
976         caution = 0;
977         highWatermark = std::max(0.5, caution);  // arbitrary guess
978
979         numParam = fc->varGroup->vars.size();
980         for (size_t px=0; px < numParam; ++px) {
981                 if (strcmp(fc->flavor[px], flavor) != 0) continue;
982                 vars.push_back(px);
983         }
984
985         prevAdj1.assign(numParam, 0);
986         prevAdj2.resize(numParam);
987         prevEst.resize(numParam);
988         memcpy(prevEst.data(), fc->est, sizeof(double) * numParam);
989
990         if (verbose >= 2) {
991                 mxLog("Ramsay[%10s]: %d parameters, caution %f, min caution %f",
992                       flavor, (int)vars.size(), caution, minCaution);
993         }
994 }
995
996 void Ramsay1975::recordEstimate(int px, double newEst)
997 {
998         omxFreeVar *fv = fc->varGroup->vars[px];
999         bool hitBound=false;
1000         double param = newEst;
1001         if (param < fv->lbound) {
1002                 hitBound=true;
1003                 param = prevEst[px] - (prevEst[px] - fv->lbound) / 2;
1004         }
1005         if (param > fv->ubound) {
1006                 hitBound=true;
1007                 param = prevEst[px] + (fv->ubound - prevEst[px]) / 2;
1008         }
1009         boundsHit += hitBound;
1010         
1011         prevAdj2[px] = prevAdj1[px];
1012         prevAdj1[px] = param - prevEst[px];
1013         
1014         if (verbose >= 4) {
1015                 std::string buf;
1016                 buf += string_snprintf("Ramsay[%10s]: %d~%s %.4f -> %.4f", flavor, px, fv->name, prevEst[px], param);
1017                 if (hitBound) {
1018                         buf += string_snprintf(" wanted %.4f but hit bound", newEst);
1019                 }
1020                 if (prevAdj1[px] * prevAdj2[px] < 0) {
1021                         buf += " *OSC*";
1022                 }
1023                 buf += "\n";
1024                 mxLogBig(buf);
1025         }
1026
1027         fc->est[px] = param;
1028         prevEst[px] = param;
1029 }
1030
1031 void Ramsay1975::apply()
1032 {
1033         for (size_t px=0; px < vars.size(); ++px) {
1034                 int vx = vars[px];
1035                 recordEstimate(vx, (1 - caution) * fc->est[vx] + caution * prevEst[vx]);
1036         }
1037 }
1038
1039 void Ramsay1975::recalibrate(bool *restart)
1040 {
1041         if (vars.size() == 0) return;
1042
1043         double normPrevAdj2 = 0;
1044         double normAdjDiff = 0;
1045         std::vector<double> adjDiff(numParam);
1046
1047         // The choice of norm is also arbitrary. Other norms might work better.
1048         for (size_t vx=0; vx < vars.size(); ++vx) {
1049                 int px = vars[vx];
1050                 adjDiff[px] = prevAdj1[px] - prevAdj2[px];
1051                 normPrevAdj2 += prevAdj2[px] * prevAdj2[px];
1052         }
1053
1054         for (size_t vx=0; vx < vars.size(); ++vx) {
1055                 int px = vars[vx];
1056                 normAdjDiff += adjDiff[px] * adjDiff[px];
1057         }
1058         if (normAdjDiff == 0) {
1059                 return;
1060                 //Rf_error("Ramsay: no free variables of flavor %d", flavor);
1061         }
1062
1063         double ratio = sqrt(normPrevAdj2 / normAdjDiff);
1064         //if (verbose >= 3) mxLog("Ramsay[%d]: sqrt(%.5f/%.5f) = %.5f",
1065         // flavor, normPrevAdj2, normAdjDiff, ratio);
1066
1067         double newCaution = 1 - (1-caution) * ratio / (1+boundsHit);
1068         if (newCaution > .95) newCaution = .95;  // arbitrary guess
1069         if (newCaution < 0) newCaution /= 2;     // don't get overconfident
1070         if (newCaution < minCaution) newCaution = minCaution;
1071         if (newCaution < caution) {
1072                 caution = newCaution/3 + 2*caution/3;  // don't speed up too fast, arbitrary ratio
1073         } else {
1074                 caution = newCaution;
1075         }
1076         maxCaution = std::max(maxCaution, caution);
1077         goingWild = false;
1078         if (caution < highWatermark || (normPrevAdj2 < 1e-3 && normAdjDiff < 1e-3)) {
1079                 if (verbose >= 3) mxLog("Ramsay[%10s]: %.2f caution", flavor, caution);
1080         } else {
1081                 if (verbose >= 3) {
1082                         mxLog("Ramsay[%10s]: caution %.2f > %.2f, extreme oscillation, restart recommended",
1083                               flavor, caution, highWatermark);
1084                 }
1085                 *restart = TRUE;
1086                 goingWild = true;
1087         }
1088         highWatermark += .02; // arbitrary guess
1089         boundsHit = 0;
1090 }
1091
1092 void Ramsay1975::restart(bool myFault)
1093 {
1094         memcpy(prevEst.data(), fc->est, sizeof(double) * numParam);
1095         prevAdj1.assign(numParam, 0);
1096         prevAdj2.assign(numParam, 0);
1097         myFault |= goingWild;
1098         if (myFault) {
1099                 highWatermark = 1 - (1 - highWatermark) * .5; // arbitrary guess
1100                 caution = std::max(caution, highWatermark);   // arbitrary guess
1101                 maxCaution = std::max(maxCaution, caution);
1102                 highWatermark = caution;
1103         }
1104         if (vars.size() && verbose >= 3) {
1105                 mxLog("Ramsay[%10s]: restart%s with %.2f caution %.2f highWatermark",
1106                       flavor, myFault? " (my fault)":"", caution, highWatermark);
1107         }
1108 }
1109
1110 omxCompute::omxCompute()
1111 {
1112         varGroup = NULL;
1113 }
1114
1115 void omxCompute::collectResultsHelper(FitContext *fc, std::vector< omxCompute* > &clist,
1116                                       LocalComputeResult *lcr, MxRList *out)
1117 {
1118         for (std::vector< omxCompute* >::iterator it = clist.begin(); it != clist.end(); ++it) {
1119                 omxCompute *c1 = *it;
1120                 c1->collectResults(fc, lcr, out);
1121         }
1122 }
1123
1124 void omxCompute::collectResults(FitContext *fc, LocalComputeResult *lcr, MxRList *out)
1125 {
1126         MxRList *slots = new MxRList();
1127         reportResults(fc, slots, out);
1128         if (slots->size()) {
1129                 lcr->push_back(std::make_pair(computeId, slots));
1130         } else {
1131                 delete slots;
1132         }
1133 }
1134
1135 omxCompute::~omxCompute()
1136 {}
1137
1138 void omxCompute::initFromFrontend(SEXP rObj)
1139 {
1140         SEXP slotValue;
1141         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("id")));
1142         if (Rf_length(slotValue) != 1) Rf_error("MxCompute has no ID");
1143
1144         computeId = INTEGER(slotValue)[0];
1145         varGroup = Global->findVarGroup(computeId);
1146
1147         if (!varGroup) {
1148                 Rf_protect(slotValue = R_do_slot(rObj, Rf_install("freeSet")));
1149                 if (Rf_length(slotValue) == 0) {
1150                         varGroup = Global->findVarGroup(FREEVARGROUP_NONE);
1151                 } else if (strcmp(CHAR(STRING_ELT(slotValue, 0)), ".")==0) {
1152                         varGroup = Global->freeGroup[FREEVARGROUP_ALL];
1153                 } else {
1154                         Rf_warning("MxCompute ID %d references matrix '%s' in its freeSet "
1155                                 "but this matrix contains no free parameters",
1156                                 computeId, CHAR(STRING_ELT(slotValue, 0)));
1157                         varGroup = Global->findVarGroup(FREEVARGROUP_NONE);
1158                 }
1159         }
1160         if (OMX_DEBUG) {
1161                 mxLog("MxCompute id %d assigned to var group %d", computeId, varGroup->id[0]);
1162         }
1163 }
1164
1165 void omxCompute::compute(FitContext *fc)
1166 {
1167         ComputeInform origInform = fc->inform;
1168         FitContext *narrow = fc;
1169         if (fc->varGroup != varGroup) narrow = new FitContext(fc, varGroup);
1170         narrow->inform = INFORM_UNINITIALIZED;
1171         computeImpl(narrow);
1172         fc->inform = std::max(origInform, narrow->inform);
1173         if (fc->varGroup != varGroup) narrow->updateParentAndFree();
1174         Global->checkpointMessage(fc, fc->est, "%s", name);
1175 }
1176
1177 class ComputeContainer : public omxCompute {
1178         typedef omxCompute super;
1179 protected:
1180         std::vector< omxCompute* > clist;
1181 public:
1182         virtual void collectResults(FitContext *fc, LocalComputeResult *lcr, MxRList *out);
1183 };
1184
1185 void ComputeContainer::collectResults(FitContext *fc, LocalComputeResult *lcr, MxRList *out)
1186 {
1187         super::collectResults(fc, lcr, out);
1188         collectResultsHelper(fc, clist, lcr, out);
1189 }
1190
1191 class omxComputeSequence : public ComputeContainer {
1192         typedef ComputeContainer super;
1193
1194  public:
1195         virtual void initFromFrontend(SEXP rObj);
1196         virtual void computeImpl(FitContext *fc);
1197         virtual ~omxComputeSequence();
1198 };
1199
1200 class omxComputeIterate : public ComputeContainer {
1201         typedef ComputeContainer super;
1202         int maxIter;
1203         double tolerance;
1204         int verbose;
1205
1206  public:
1207         virtual void initFromFrontend(SEXP rObj);
1208         virtual void computeImpl(FitContext *fc);
1209         virtual ~omxComputeIterate();
1210 };
1211
1212 class omxComputeOnce : public omxCompute {
1213         typedef omxCompute super;
1214         std::vector< omxMatrix* > algebras;
1215         std::vector< omxExpectation* > expectations;
1216         std::vector< const char* > predict;
1217         const char *how;
1218         int verbose;
1219         bool mac;
1220         bool starting;
1221         bool fit;
1222         bool gradient;
1223         bool hessian;
1224         bool ihessian;
1225         bool infoMat;
1226         enum ComputeInfoMethod infoMethod;
1227         bool hgprod;
1228         bool isBestFit; // for backward compatibility
1229
1230  public:
1231         virtual void initFromFrontend(SEXP rObj);
1232         virtual void computeImpl(FitContext *fc);
1233         virtual void reportResults(FitContext *fc, MxRList *slots, MxRList *out);
1234 };
1235
1236 class ComputeEM : public omxCompute {
1237         typedef omxCompute super;
1238         std::vector< omxExpectation* > expectations;
1239         const char *predict;
1240         omxCompute *fit1;  // maybe rename to mstep TODO
1241         omxMatrix *fit3;   // rename to observedFit
1242         int EMcycles;
1243         int maxIter;
1244         int mstepIter;
1245         int totalMstepIter;
1246         double tolerance;
1247         double semTolerance;
1248         int verbose;
1249         bool useRamsay;
1250         bool information;
1251         std::vector< omxMatrix * > semFitFunction;
1252         enum ComputeInfoMethod infoMethod;
1253         enum SEMMethod { ClassicSEM, TianSEM, GridSEM, AgileSEM } semMethod;
1254         double *semMethodData;
1255         int semMethodLen;
1256         bool semDebug;
1257         bool semFixSymmetry;
1258         bool semForcePD;
1259         int agileMaxIter;
1260         SEXP rateMatrix; //debug
1261         SEXP inputInfoMatrix; //debug
1262         SEXP outputInfoMatrix; //debug
1263         SEXP origEigenvalues; //debug
1264         std::vector<Ramsay1975*> ramsay;
1265         double noiseTarget;
1266         double noiseTolerance;
1267         std::vector<double*> estHistory;
1268         std::vector<double> probeOffset;
1269         std::vector<double> diffWork;
1270         std::vector<int> paramHistLen;
1271         std::vector<double> optimum;
1272         double bestFit;
1273         static const double MIDDLE_START;
1274         static const double MIDDLE_END;
1275         size_t maxHistLen;
1276         int semProbeCount;
1277
1278         void setExpectationPrediction(const char *context);
1279         void probeEM(FitContext *fc, int vx, double offset, std::vector<double> *rijWork);
1280         void recordDiff(FitContext *fc, int v1, std::vector<double> &rijWork,
1281                         double *stdDiff, bool *mengOK);
1282
1283  public:
1284         virtual void initFromFrontend(SEXP rObj);
1285         virtual void computeImpl(FitContext *fc);
1286         virtual void collectResults(FitContext *fc, LocalComputeResult *lcr, MxRList *out);
1287         virtual void reportResults(FitContext *fc, MxRList *slots, MxRList *out);
1288         virtual ~ComputeEM();
1289 };
1290
1291 const double ComputeEM::MIDDLE_START = 0.105360515657826281366; // -log(.9) constexpr
1292 const double ComputeEM::MIDDLE_END = 0.001000500333583534363566; // -log(.999) constexpr
1293
1294 class ComputeStandardError : public omxCompute {
1295         typedef omxCompute super;
1296  public:
1297         virtual void reportResults(FitContext *fc, MxRList *slots, MxRList *out);
1298 };
1299
1300 class ComputeHessianQuality : public omxCompute {
1301         typedef omxCompute super;
1302  public:
1303         virtual void reportResults(FitContext *fc, MxRList *slots, MxRList *out);
1304 };
1305
1306 class ComputeReportDeriv : public omxCompute {
1307         typedef omxCompute super;
1308  public:
1309         virtual void reportResults(FitContext *fc, MxRList *slots, MxRList *out);
1310 };
1311
1312 static class omxCompute *newComputeSequence()
1313 { return new omxComputeSequence(); }
1314
1315 static class omxCompute *newComputeIterate()
1316 { return new omxComputeIterate(); }
1317
1318 static class omxCompute *newComputeOnce()
1319 { return new omxComputeOnce(); }
1320
1321 static class omxCompute *newComputeEM()
1322 { return new ComputeEM(); }
1323
1324 static class omxCompute *newComputeStandardError()
1325 { return new ComputeStandardError(); }
1326
1327 static class omxCompute *newComputeHessianQuality()
1328 { return new ComputeHessianQuality(); }
1329
1330 static class omxCompute *newComputeReportDeriv()
1331 { return new ComputeReportDeriv(); }
1332
1333 struct omxComputeTableEntry {
1334         char name[32];
1335         omxCompute *(*ctor)();
1336 };
1337
1338 static const struct omxComputeTableEntry omxComputeTable[] = {
1339         {"MxComputeNumericDeriv", &newComputeNumericDeriv},
1340         {"MxComputeGradientDescent", &newComputeGradientDescent},
1341         {"MxComputeSequence", &newComputeSequence },
1342         {"MxComputeIterate", &newComputeIterate },
1343         {"MxComputeOnce", &newComputeOnce },
1344         {"MxComputeNewtonRaphson", &newComputeNewtonRaphson},
1345         {"MxComputeEM", &newComputeEM },
1346         {"MxComputeStandardError", &newComputeStandardError},
1347         {"MxComputeHessianQuality", &newComputeHessianQuality},
1348         {"MxComputeReportDeriv", &newComputeReportDeriv},
1349         {"MxComputeConfidenceInterval", &newComputeConfidenceInterval}
1350 };
1351
1352 omxCompute *omxNewCompute(omxState* os, const char *type)
1353 {
1354         omxCompute *got = NULL;
1355
1356         for (size_t fx=0; fx < OMX_STATIC_ARRAY_SIZE(omxComputeTable); fx++) {
1357                 const struct omxComputeTableEntry *entry = omxComputeTable + fx;
1358                 if(strcmp(type, entry->name) == 0) {
1359                         got = entry->ctor();
1360                         got->name = entry->name;
1361                         break;
1362                 }
1363         }
1364
1365         if (!got) Rf_error("Compute %s is not implemented", type);
1366
1367         return got;
1368 }
1369
1370 void omxComputeSequence::initFromFrontend(SEXP rObj)
1371 {
1372         super::initFromFrontend(rObj);
1373
1374         SEXP slotValue;
1375         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("steps")));
1376
1377         for (int cx = 0; cx < Rf_length(slotValue); cx++) {
1378                 SEXP step = VECTOR_ELT(slotValue, cx);
1379                 SEXP s4class;
1380                 Rf_protect(s4class = STRING_ELT(Rf_getAttrib(step, Rf_install("class")), 0));
1381                 omxCompute *compute = omxNewCompute(globalState, CHAR(s4class));
1382                 compute->initFromFrontend(step);
1383                 if (isErrorRaised(globalState)) break;
1384                 clist.push_back(compute);
1385         }
1386 }
1387
1388 void omxComputeSequence::computeImpl(FitContext *fc)
1389 {
1390         for (size_t cx=0; cx < clist.size(); ++cx) {
1391                 clist[cx]->compute(fc);
1392                 if (isErrorRaised(globalState)) break;
1393         }
1394 }
1395
1396 omxComputeSequence::~omxComputeSequence()
1397 {
1398         for (size_t cx=0; cx < clist.size(); ++cx) {
1399                 delete clist[cx];
1400         }
1401 }
1402
1403 void omxComputeIterate::initFromFrontend(SEXP rObj)
1404 {
1405         SEXP slotValue;
1406
1407         super::initFromFrontend(rObj);
1408
1409         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("maxIter")));
1410         maxIter = INTEGER(slotValue)[0];
1411
1412         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("tolerance")));
1413         tolerance = REAL(slotValue)[0];
1414         if (tolerance <= 0) Rf_error("tolerance must be positive");
1415
1416         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("steps")));
1417
1418         for (int cx = 0; cx < Rf_length(slotValue); cx++) {
1419                 SEXP step = VECTOR_ELT(slotValue, cx);
1420                 SEXP s4class;
1421                 Rf_protect(s4class = STRING_ELT(Rf_getAttrib(step, Rf_install("class")), 0));
1422                 omxCompute *compute = omxNewCompute(globalState, CHAR(s4class));
1423                 compute->initFromFrontend(step);
1424                 if (isErrorRaised(globalState)) break;
1425                 clist.push_back(compute);
1426         }
1427
1428         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("verbose")));
1429         verbose = Rf_asInteger(slotValue);
1430 }
1431
1432 void omxComputeIterate::computeImpl(FitContext *fc)
1433 {
1434         int iter = 0;
1435         double prevFit = 0;
1436         double mac = tolerance * 10;
1437         while (1) {
1438                 ++fc->iterations;
1439                 for (size_t cx=0; cx < clist.size(); ++cx) {
1440                         clist[cx]->compute(fc);
1441                         if (isErrorRaised(globalState)) break;
1442                 }
1443                 if (fc->wanted & FF_COMPUTE_MAXABSCHANGE) {
1444                         if (fc->mac < 0) {
1445                                 Rf_warning("MAC estimated at %.4f; something is wrong", fc->mac);
1446                                 break;
1447                         } else {
1448                                 mac = fc->mac;
1449                                 if (verbose) mxLog("ComputeIterate: mac %.9g", mac);
1450                         }
1451                 }
1452                 if (fc->wanted & FF_COMPUTE_FIT) {
1453                         if (fc->fit == 0) {
1454                                 Rf_warning("Fit estimated at 0; something is wrong");
1455                                 break;
1456                         }
1457                         if (prevFit != 0) {
1458                                 double change = prevFit - fc->fit;
1459                                 if (verbose) mxLog("ComputeIterate: fit %.9g change %.9g", fc->fit, change);
1460                                 mac = fabs(change);
1461                         } else {
1462                                 if (verbose) mxLog("ComputeIterate: initial fit %.9g", fc->fit);
1463                         }
1464                         prevFit = fc->fit;
1465                 }
1466                 if (!(fc->wanted & (FF_COMPUTE_MAXABSCHANGE | FF_COMPUTE_FIT))) {
1467                         omxRaiseErrorf("ComputeIterate: neither MAC nor fit available");
1468                 }
1469                 if (isErrorRaised(globalState) || ++iter > maxIter || mac < tolerance) break;
1470         }
1471 }
1472
1473 omxComputeIterate::~omxComputeIterate()
1474 {
1475         for (size_t cx=0; cx < clist.size(); ++cx) {
1476                 delete clist[cx];
1477         }
1478 }
1479
1480 void ComputeEM::initFromFrontend(SEXP rObj)
1481 {
1482         SEXP slotValue;
1483         SEXP s4class;
1484
1485         super::initFromFrontend(rObj);
1486
1487         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("expectation")));
1488         for (int wx=0; wx < Rf_length(slotValue); ++wx) {
1489                 int objNum = INTEGER(slotValue)[wx];
1490                 omxExpectation *expectation = globalState->expectationList[objNum];
1491                 setFreeVarGroup(expectation, varGroup);
1492                 omxCompleteExpectation(expectation);
1493                 expectations.push_back(expectation);
1494         }
1495
1496         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("predict")));
1497         {
1498                 // Should accept a vector here TODO
1499                 if (Rf_length(slotValue) != 1) Rf_error("Not implemented");
1500                 SEXP elem;
1501                 Rf_protect(elem = STRING_ELT(slotValue, 0));
1502                 predict = CHAR(elem);
1503         }
1504
1505         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("mstep")));
1506         Rf_protect(s4class = STRING_ELT(Rf_getAttrib(slotValue, Rf_install("class")), 0));
1507         fit1 = omxNewCompute(globalState, CHAR(s4class));
1508         fit1->initFromFrontend(slotValue);
1509
1510         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("observedFit")));
1511         fit3 = globalState->algebraList[ INTEGER(slotValue)[0] ];
1512         if (fit3->fitFunction) {
1513                 setFreeVarGroup(fit3->fitFunction, varGroup);
1514                 omxCompleteFitFunction(fit3);
1515         }
1516
1517         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("maxIter")));
1518         maxIter = INTEGER(slotValue)[0];
1519
1520         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("tolerance")));
1521         tolerance = REAL(slotValue)[0];
1522         if (tolerance <= 0) Rf_error("tolerance must be positive");
1523
1524         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("verbose")));
1525         verbose = Rf_asInteger(slotValue);
1526
1527         useRamsay = false;
1528         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("accel")));
1529         const char *accelName = CHAR(STRING_ELT(slotValue, 0));
1530         if (strEQ(accelName, "ramsay1975")) {
1531                 useRamsay = true;
1532         } else if (STRING_ELT(slotValue, 0) == NA_STRING) {
1533                 // OK
1534         } else {
1535                 Rf_warning("%s: unknown acceleration method %s ignored", name, accelName);
1536         }
1537
1538         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("information")));
1539         const char *infoName = CHAR(STRING_ELT(slotValue, 0));
1540         information = false;
1541         if (STRING_ELT(slotValue, 0) == NA_STRING) {
1542                 // ok
1543         } else if (strEQ(infoName, "mr1991")) {
1544                 information = true;
1545         } else {
1546                 Rf_warning("%s: unknown information method %s ignored", name, infoName);
1547         }
1548
1549         if (information) {
1550                 infoMethod = INFO_METHOD_HESSIAN;
1551                 semMethod = AgileSEM;
1552                 agileMaxIter = 1;
1553                 semDebug = false;
1554                 semFixSymmetry = true;
1555                 semForcePD = false;
1556                 noiseTarget = exp(-5.0); //constexpr
1557                 noiseTolerance = exp(3.3); //constexpr
1558                 semTolerance = sqrt(tolerance);  // override needed?
1559
1560                 SEXP infoArgs, argNames;
1561                 Rf_protect(infoArgs = R_do_slot(rObj, Rf_install("infoArgs")));
1562                 Rf_protect(argNames = Rf_getAttrib(infoArgs, R_NamesSymbol));
1563
1564                 for (int ax=0; ax < Rf_length(infoArgs); ++ax) {
1565                         const char *key = R_CHAR(STRING_ELT(argNames, ax));
1566                         slotValue = VECTOR_ELT(infoArgs, ax);
1567                         if (strEQ(key, "fitfunction")) {
1568                                 for (int fx=0; fx < Rf_length(slotValue); ++fx) {
1569                                         omxMatrix *ff = globalState->algebraList[INTEGER(slotValue)[fx]];
1570                                         if (!ff->fitFunction) Rf_error("infoArgs$fitfunction is %s, not a fitfunction", ff->name);
1571                                         semFitFunction.push_back(ff);
1572                                 }
1573                         } else if (strEQ(key, "inputInfo")) {
1574                                 infoMethod = stringToInfoMethod(CHAR(slotValue));
1575                         } else if (strEQ(key, "semMethod")) {
1576                                 semMethodLen = Rf_length(slotValue);
1577                                 if (semMethodLen == 0) {
1578                                         semMethod = AgileSEM;
1579                                         semMethodData = NULL;
1580                                 } else {
1581                                         semMethodData = REAL(slotValue);
1582                                         if (semMethodLen > 1) {
1583                                                 semMethod = GridSEM;
1584                                         } else if (semMethodData[0] == 1) {
1585                                                 semMethod = ClassicSEM;
1586                                         } else if (semMethodData[0] == 2) {
1587                                                 semMethod = TianSEM;
1588                                         } else if (semMethodData[0] == 3) {
1589                                                 semMethod = AgileSEM;
1590                                         } else {
1591                                                 Rf_error("Unknown SEM method %f", semMethodData[0]);
1592                                         }
1593                                 }
1594                         } else if (strEQ(key, "agileMaxIter")) {
1595                                 agileMaxIter = INTEGER(slotValue)[0];
1596                         } else if (strEQ(key, "semDebug")) {
1597                                 semDebug = Rf_asLogical(slotValue);
1598                         } else if (strEQ(key, "semFixSymmetry")) {
1599                                 semFixSymmetry = Rf_asLogical(slotValue);
1600                         } else if (strEQ(key, "semForcePD")) {
1601                                 semForcePD = Rf_asLogical(slotValue);
1602                         } else if (strEQ(key, "noiseTarget")) {
1603                                 noiseTarget = REAL(slotValue)[0];
1604                                 if (noiseTarget <= 0) Rf_error("noiseTarget must be positive");
1605                         } else if (strEQ(key, "noiseTolerance")) {
1606                                 noiseTolerance = REAL(slotValue)[0];
1607                                 if (noiseTolerance < 1) Rf_error("noiseTolerance must be >=1");
1608                         } else {
1609                                 mxLog("%s: unknown key %s", name, key);
1610                         }
1611                 }
1612                 if (!semFixSymmetry && semForcePD) {
1613                         Rf_warning("%s: semFixSymmetry must be enabled for semForcePD", name);
1614                         semForcePD = false;
1615                 }
1616         }
1617
1618         inputInfoMatrix = NULL;
1619         outputInfoMatrix = NULL;
1620         rateMatrix = NULL;
1621         origEigenvalues = NULL;
1622 }
1623
1624 void ComputeEM::setExpectationPrediction(const char *context)
1625 {
1626         for (size_t wx=0; wx < expectations.size(); ++wx) {
1627                 omxExpectation *expectation = expectations[wx];
1628                 if (verbose >= 4) mxLog("ComputeEM: expectation[%d] %s predict %s", (int) wx, expectation->name, context);
1629                 omxExpectationCompute(expectation, context);
1630         }
1631 }
1632
1633 void ComputeEM::probeEM(FitContext *fc, int vx, double offset, std::vector<double> *rijWork)
1634 {
1635         const size_t freeVars = fc->varGroup->vars.size();
1636         const int base = paramHistLen[vx] * freeVars;
1637         probeOffset[vx * maxHistLen + paramHistLen[vx]] = offset;
1638         paramHistLen[vx] += 1;
1639
1640         memcpy(fc->est, optimum.data(), sizeof(double) * freeVars);
1641         fc->est[vx] += offset;
1642         fc->copyParamToModel(globalState);
1643
1644         setExpectationPrediction(predict);
1645         int informSave = fc->inform;  // not sure if we want to hide inform here TODO
1646         fit1->compute(fc);
1647         fc->inform = informSave;
1648         setExpectationPrediction("nothing");
1649
1650         if (verbose >= 3) mxLog("ComputeEM: probe %d of param %d offset %.6f",
1651                                 paramHistLen[vx], vx, offset);
1652
1653         for (size_t v1=0; v1 < freeVars; ++v1) {
1654                 double got = (fc->est[v1] - optimum[v1]) / offset;
1655                 (*rijWork)[base + v1] = got;
1656         }
1657         //pda(rij.data() + base, 1, freeVars);
1658         ++semProbeCount;
1659 }
1660
1661 void ComputeEM::recordDiff(FitContext *fc, int v1, std::vector<double> &rijWork,
1662                            double *stdDiff, bool *mengOK)
1663 {
1664         const size_t freeVars = fc->varGroup->vars.size();
1665         int h1 = paramHistLen[v1]-2;
1666         int h2 = paramHistLen[v1]-1;
1667         double *rij1 = rijWork.data() + h1 * freeVars;
1668         double *rij2 = rijWork.data() + h2 * freeVars;
1669         double diff = 0;
1670         *mengOK = true;
1671         for (size_t v2=0; v2 < freeVars; ++v2) {
1672                 double diff1 = fabs(rij1[v2] - rij2[v2]);
1673                 if (diff1 >= semTolerance) *mengOK = false;
1674                 diff += diff1;
1675         }
1676         double p1 = probeOffset[v1 * maxHistLen + h1];
1677         double p2 = probeOffset[v1 * maxHistLen + h2];
1678         double dist = fabs(p1 - p2);
1679         if (dist < tolerance/4) Rf_error("SEM: invalid probe offset distance %.9f", dist);
1680         *stdDiff = diff / (freeVars * dist);
1681         diffWork[v1 * maxHistLen + h1] = *stdDiff;
1682         if (verbose >= 2) mxLog("ComputeEM: (%f,%f) mengOK %d diff %f stdDiff %f",
1683                                 p1, p2, *mengOK, diff / freeVars, *stdDiff);
1684 }
1685
1686 void ComputeEM::computeImpl(FitContext *fc)
1687 {
1688         const double Scale = fabs(Global->llScale);
1689         double prevFit = 0;
1690         double mac = tolerance * 10;
1691         bool converged = false;
1692         const size_t freeVars = fc->varGroup->vars.size();
1693         bool in_middle = false;
1694         maxHistLen = 0;
1695         EMcycles = 0;
1696         semProbeCount = 0;
1697
1698         if (verbose >= 1) mxLog("ComputeEM: Welcome, tolerance=%g ramsay=%d info=%d",
1699                                 tolerance, useRamsay, information);
1700
1701         // Some evidence suggests that better performance is obtained when the
1702         // item parameters and latent distribution parameters are split into
1703         // separate Ramsay1975 groups with different minimum caution limits,
1704         //
1705         // ramsay.push_back(new Ramsay1975(fc, 1+int(ramsay.size()), 0, verbose, -1.25)); // M-step param
1706         // ramsay.push_back(new Ramsay1975(fc, 1+int(ramsay.size()), 0, verbose, -1));    // extra param
1707         //
1708         // I had this hardcoded for a while, but in making the API more generic,
1709         // I'm not sure how to allow specification of the Ramsay1975 grouping.
1710         // One possibility is list(flavor1=c("ItemParam"), flavor2=c("mean","cov"))
1711         // but this doesn't allow finer grain than matrix-wise assignment. The
1712         // other question is whether more Ramsay1975 groups really help or not.
1713         // nightly/ifa-cai2009.R actually got faster with 1 Ramsay group.
1714
1715         const char *flavor = "EM";
1716         fc->flavor.assign(freeVars, flavor);
1717         ramsay.push_back(new Ramsay1975(fc, flavor, verbose, -1.25));
1718
1719         while (1) {
1720                 if (verbose >= 4) mxLog("ComputeEM[%d]: E-step", EMcycles);
1721                 setExpectationPrediction(predict);
1722
1723                 {
1724                         if (verbose >= 4) mxLog("ComputeEM[%d]: M-step", EMcycles);
1725                         int informSave = fc->inform;
1726                         FitContext *fc1 = new FitContext(fc, fit1->varGroup);
1727                         int startIter = fc1->iterations;
1728                         fit1->compute(fc1);
1729                         mstepIter = fc1->iterations - startIter;
1730                         fc1->updateParentAndFree();
1731                         fc->inform = informSave;
1732                 }
1733
1734                 {
1735                         if (useRamsay) {
1736                                 bool wantRestart;
1737                                 if (EMcycles > 3 && EMcycles % 3 == 0) {
1738                                         for (size_t rx=0; rx < ramsay.size(); ++rx) {
1739                                                 ramsay[rx]->recalibrate(&wantRestart);
1740                                         }
1741                                 }
1742                                 for (size_t rx=0; rx < ramsay.size(); ++rx) {
1743                                         ramsay[rx]->apply();
1744                                 }
1745                         }
1746                         fc->copyParamToModel(globalState);
1747                         if (verbose >= 4) mxLog("ComputeEM[%d]: observed fit", EMcycles);
1748                         setExpectationPrediction("nothing");
1749                         Global->checkpointPrefit(fc, fc->est, false);
1750                         omxForceCompute(fit3);
1751                         fc->fit = fit3->data[0];
1752                         Global->checkpointPostfit(fc);
1753                 }
1754
1755                 totalMstepIter += mstepIter;
1756
1757                 if (!(fc->wanted & FF_COMPUTE_FIT)) {
1758                         omxRaiseErrorf("ComputeEM: fit not available");
1759                         break;
1760                 }
1761                 if (fc->fit == 0) {
1762                         omxRaiseErrorf("Fit estimated at 0; something is wrong");
1763                         break;
1764                 }
1765                 double change = 0;
1766                 if (prevFit != 0) {
1767                         change = prevFit - fc->fit;
1768                         if (verbose >= 2) mxLog("ComputeEM[%d]: msteps %d fit %.9g change %.9g",
1769                                                 EMcycles, mstepIter, fc->fit, change);
1770                         mac = fabs(change);
1771                         if (mac < MIDDLE_START * Scale) in_middle = true;
1772                         if (mac < MIDDLE_END * Scale) in_middle = false;
1773                 } else {
1774                         if (verbose >= 2) mxLog("ComputeEM: msteps %d initial fit %.9g",
1775                                                 mstepIter, fc->fit);
1776                 }
1777
1778                 prevFit = fc->fit;
1779                 converged = mac < tolerance;
1780                 ++fc->iterations;
1781                 if (isErrorRaised(globalState) || ++EMcycles > maxIter || converged) break;
1782
1783                 if (semMethod == ClassicSEM || ((semMethod == TianSEM || semMethod == AgileSEM) && in_middle)) {
1784                         double *estCopy = new double[freeVars];
1785                         memcpy(estCopy, fc->est, sizeof(double) * freeVars);
1786                         estHistory.push_back(estCopy);
1787                 }
1788         }
1789
1790         int wanted = FF_COMPUTE_FIT | FF_COMPUTE_BESTFIT | FF_COMPUTE_ESTIMATE;
1791         fc->wanted = wanted;
1792         bestFit = fc->fit;
1793         if (verbose >= 1) mxLog("ComputeEM: cycles %d/%d total mstep %d fit %f",
1794                                 EMcycles, maxIter, totalMstepIter, bestFit);
1795
1796         if (!converged || !information) return;
1797
1798         if (verbose >= 1) mxLog("ComputeEM: tolerance=%f semMethod=%d, semTolerance=%f ideal noise=[%f,%f]",
1799                                 tolerance, semMethod, semTolerance,
1800                                 noiseTarget/noiseTolerance, noiseTarget*noiseTolerance);
1801
1802         optimum.resize(freeVars);
1803         memcpy(optimum.data(), fc->est, sizeof(double) * freeVars);
1804
1805         if (semMethod == AgileSEM) {
1806                 maxHistLen = 2 + agileMaxIter * 2;
1807         } else if (semMethod == ClassicSEM || semMethod == TianSEM) {
1808                 maxHistLen = estHistory.size();
1809         } else {
1810                 maxHistLen = semMethodLen;
1811         }
1812
1813         probeOffset.resize(maxHistLen * freeVars);
1814         diffWork.resize(maxHistLen * freeVars);
1815         paramHistLen.assign(freeVars, 0);
1816         omxBuffer<double> rij(freeVars * freeVars);
1817
1818         size_t semConverged=0;
1819         for (size_t v1=0; v1 < freeVars; ++v1) {
1820                 std::vector<double> rijWork(freeVars * maxHistLen);
1821                 int pick = 0;
1822                 bool paramConverged = false;
1823                 if (semMethod == AgileSEM) {
1824                         const double stepSize = tolerance;
1825
1826                         double offset1 = tolerance * 50;
1827                         double sign = 1;
1828                         if (estHistory.size()) {
1829                                 int hpick = estHistory.size() /2;
1830                                 double popt = optimum[v1];
1831                                 sign = (popt < estHistory[hpick][v1])? 1 : -1;
1832                                 offset1 = fabs(estHistory[hpick][v1] - popt);
1833                                 if (offset1 < 10 * tolerance) offset1 = 10 * tolerance;
1834                                 if (offset1 > 1000 * tolerance) offset1 = 1000 * tolerance;
1835                         }
1836
1837                         probeEM(fc, v1, sign * offset1, &rijWork);
1838                         double offset2 = offset1 + stepSize;
1839                         probeEM(fc, v1, sign * offset2, &rijWork);
1840                         double diff;
1841                         bool mengOK;
1842                         recordDiff(fc, v1, rijWork, &diff, &mengOK);
1843                         double midOffset = (offset1 + offset2) / 2;
1844
1845                         int iter = 0;
1846                         omxBuffer<double> coefHist(agileMaxIter);
1847                         while (++iter <= agileMaxIter &&
1848                                !(noiseTarget/noiseTolerance < diff && diff < noiseTarget*noiseTolerance)) {
1849                                 coefHist[iter-1] = diff * midOffset * midOffset;
1850                                 double coef = 0;
1851                                 for (int cx=0; cx < iter; ++cx) coef += coefHist[cx];
1852                                 coef /= iter;
1853                                 if (verbose >= 4) mxLog("ComputeEM: agile iter[%d] coef=%.6g", iter, coef);
1854                                 offset1 = sqrt(coef/noiseTarget);
1855                                 probeEM(fc, v1, sign * offset1, &rijWork);
1856                                 if (iter < agileMaxIter || semDebug) {
1857                                         offset2 = offset1 + stepSize;
1858                                         probeEM(fc, v1, sign * offset2, &rijWork);
1859                                         midOffset = (offset1 + offset2) / 2;
1860                                         recordDiff(fc, v1, rijWork, &diff, &mengOK);
1861                                 }
1862                                 pick += 2;
1863                         }
1864                         paramConverged = true;
1865                 } else if (semMethod == ClassicSEM || semMethod == TianSEM) {
1866                         if (!estHistory.size()) {
1867                                 if (verbose >= 1) mxLog("ComputeEM: no history available;"
1868                                                         " Classic or Tian SEM require convergence history");
1869                                 return;
1870                         }
1871                         for (size_t hx=0; hx < estHistory.size(); ++hx) {
1872                                 double popt = optimum[v1];
1873                                 double offset1 = estHistory[hx][v1] - popt;
1874                                 if (paramHistLen[v1] && fabs(probeOffset[v1 * maxHistLen + paramHistLen[v1]-1] -
1875                                                              offset1) < tolerance) continue;
1876                                 if (fabs(offset1) < tolerance) continue;
1877                                 probeEM(fc, v1, offset1, &rijWork);
1878                                 if (hx == 0) continue;
1879                                 pick = hx;
1880                                 double diff;
1881                                 bool mengOK;
1882                                 recordDiff(fc, v1, rijWork, &diff, &mengOK);
1883                                 if (mengOK) {
1884                                         paramConverged = true;
1885                                         break;
1886                                 }
1887                         }
1888                 } else {
1889                         for (int hx=0; hx < semMethodLen; ++hx) {
1890                                 probeEM(fc, v1, semMethodData[hx], &rijWork);
1891                                 if (hx == 0) continue;
1892                                 double diff;
1893                                 bool mengOK;
1894                                 recordDiff(fc, v1, rijWork, &diff, &mengOK);
1895                         }
1896                         paramConverged = true;
1897                 }
1898
1899                 if (paramConverged) {
1900                         ++semConverged;
1901                         memcpy(rij.data() + v1 * freeVars, rijWork.data() + pick*freeVars, sizeof(double) * freeVars);
1902                         if (verbose >= 2) mxLog("ComputeEM: param %d converged in %d probes",
1903                                                 (int) v1, paramHistLen[v1]);
1904                 } else {
1905                         if (verbose >= 2) mxLog("ComputeEM: param %d failed to converge after %d probes",
1906                                                 (int) v1, paramHistLen[v1]);
1907                         break;
1908                 }
1909         }
1910
1911         if (verbose >= 1) {
1912                 if (semConverged == freeVars) {
1913                         mxLog("ComputeEM: %d probes used to estimate Hessian", semProbeCount);
1914                 } else {
1915                         mxLog("ComputeEM: %d probes used for SEM but failed to converge", semProbeCount);
1916                 }
1917         }
1918         if (semConverged < freeVars) return;
1919
1920         fc->fit = bestFit;
1921         memcpy(fc->est, optimum.data(), sizeof(double) * freeVars);
1922         fc->copyParamToModel(globalState);
1923
1924         if (semDebug) {
1925                 Rf_protect(rateMatrix = Rf_allocMatrix(REALSXP, freeVars, freeVars));
1926                 memcpy(REAL(rateMatrix), rij.data(), sizeof(double) * freeVars * freeVars);
1927         }
1928
1929         // rij = I-rij
1930         for (size_t v1=0; v1 < freeVars; ++v1) {
1931                 for (size_t v2=0; v2 < freeVars; ++v2) {
1932                         int cell = v1 * freeVars + v2;
1933                         double entry = rij[cell];
1934                         if (v1 == v2) entry = 1 - entry;
1935                         else entry = -entry;
1936                         rij[cell] = entry;
1937                 }
1938         }
1939
1940         //mxLog("rij symm");
1941         //pda(rij.data(), freeVars, freeVars);
1942
1943         setExpectationPrediction(predict);
1944         fc->wanted = 0;
1945         fc->infoMethod = infoMethod;
1946         fc->preInfo();
1947         for (size_t fx=0; fx < semFitFunction.size(); ++fx) {
1948                 omxFitFunctionCompute(semFitFunction[fx]->fitFunction, FF_COMPUTE_INFO, fc);
1949         }
1950         fc->postInfo();
1951         setExpectationPrediction("nothing");
1952
1953         Rf_protect(inputInfoMatrix = Rf_allocMatrix(REALSXP, freeVars, freeVars));
1954         double *hess = REAL(inputInfoMatrix);
1955         fc->copyDenseHess(hess);
1956
1957         Matrix rijMat(rij.data(), freeVars, freeVars);
1958         Matrix hessMat(hess, freeVars, freeVars);
1959         omxBuffer<double> infoBuf(freeVars * freeVars);
1960         Matrix infoMat(infoBuf.data(), freeVars, freeVars);
1961
1962         SymMatrixMultiply('L', 'U', 1, 0, hessMat, rijMat, infoMat);  // result not symmetric!
1963
1964         double *ihess = fc->getDenseIHessUninitialized();
1965         int singular;
1966         if (semFixSymmetry) {
1967                 MeanSymmetric(infoMat);
1968                 singular = InvertSymmetricIndef(infoMat, 'U');
1969                 memcpy(ihess, infoBuf.data(), sizeof(double) * freeVars * freeVars);
1970         } else {
1971                 Matrix ihessMat(ihess, freeVars, freeVars);
1972                 singular = MatrixSolve(infoMat, ihessMat, true);
1973         }
1974         if (singular) {
1975                 if (verbose >= 1) mxLog("ComputeEM: SEM Hessian is singular %d", singular);
1976                 return;
1977         }
1978
1979         if (semForcePD) {
1980                 double *oev = NULL;
1981                 if (semDebug) {
1982                         Rf_protect(origEigenvalues = Rf_allocVector(REALSXP, freeVars));
1983                         oev = REAL(origEigenvalues);
1984                 }
1985                 Matrix mat(ihess, freeVars, freeVars);
1986                 InplaceForcePosSemiDef(mat, oev, &fc->infoCondNum);
1987         }
1988
1989         if (semDebug) {
1990                 // ihess is always symmetric, this could be asymmetric
1991                 Rf_protect(outputInfoMatrix = Rf_allocMatrix(REALSXP, freeVars, freeVars));
1992                 memcpy(REAL(outputInfoMatrix), ihess, sizeof(double) * freeVars * freeVars);
1993         }
1994
1995         fc->wanted = wanted | FF_COMPUTE_IHESSIAN;
1996         //pda(fc->ihess, freeVars, freeVars);
1997 }
1998
1999 void ComputeEM::collectResults(FitContext *fc, LocalComputeResult *lcr, MxRList *out)
2000 {
2001         super::collectResults(fc, lcr, out);
2002
2003         std::vector< omxCompute* > clist(1);
2004         clist[0] = fit1;
2005
2006         collectResultsHelper(fc, clist, lcr, out);
2007 }
2008
2009 void ComputeEM::reportResults(FitContext *fc, MxRList *slots, MxRList *)
2010 {
2011         size_t numFree = fc->varGroup->vars.size();
2012         if (!numFree) return;
2013
2014         MxRList out;
2015         out.add("EMcycles", Rf_ScalarInteger(EMcycles));
2016         out.add("totalMstep", Rf_ScalarInteger(totalMstepIter));
2017         out.add("semProbeCount", Rf_ScalarInteger(semProbeCount));
2018         slots->add("output", out.asR());
2019
2020         if (semDebug) {
2021                 const int freeVars = (int) fc->varGroup->vars.size();
2022                 MxRList dbg;
2023
2024                 if (probeOffset.size()) {
2025                         SEXP Rpo;
2026                         Rf_protect(Rpo = Rf_allocMatrix(REALSXP, maxHistLen, freeVars));
2027                         memcpy(REAL(Rpo), probeOffset.data(), sizeof(double) * maxHistLen * freeVars);
2028                         dbg.add("probeOffset", Rpo);
2029                 }
2030
2031                 if (diffWork.size()) {
2032                         SEXP Rdiff;
2033                         Rf_protect(Rdiff = Rf_allocMatrix(REALSXP, maxHistLen, freeVars));
2034                         memcpy(REAL(Rdiff), diffWork.data(), sizeof(double) * maxHistLen * freeVars);
2035                         dbg.add("semDiff", Rdiff);
2036                 }
2037
2038                 if (paramHistLen.size()) {
2039                         SEXP Rphl;
2040                         Rf_protect(Rphl = Rf_allocVector(INTSXP, freeVars));
2041                         memcpy(INTEGER(Rphl), paramHistLen.data(), sizeof(int) * freeVars);
2042                         dbg.add("paramHistLen", Rphl);
2043                 }
2044
2045                 if (inputInfoMatrix) dbg.add("inputInfo", inputInfoMatrix);
2046                 if (outputInfoMatrix) dbg.add("outputInfo", outputInfoMatrix);
2047                 if (rateMatrix) dbg.add("rateMatrix", rateMatrix);
2048                 if (origEigenvalues) dbg.add("origEigenvalues", origEigenvalues);
2049
2050                 slots->add("debug", dbg.asR());
2051         }
2052 }
2053
2054 ComputeEM::~ComputeEM()
2055 {
2056         for (size_t rx=0; rx < ramsay.size(); ++rx) {
2057                 delete ramsay[rx];
2058         }
2059         ramsay.clear();
2060
2061         delete fit1;
2062
2063         for (size_t hx=0; hx < estHistory.size(); ++hx) {
2064                 delete [] estHistory[hx];
2065         }
2066         estHistory.clear();
2067 }
2068
2069 enum ComputeInfoMethod omxCompute::stringToInfoMethod(const char *iMethod)
2070 {
2071         enum ComputeInfoMethod infoMethod = INFO_METHOD_DEFAULT; // to avoid gcc warning
2072         if (strcmp(iMethod, "sandwich")==0) {
2073                 infoMethod = INFO_METHOD_SANDWICH;
2074         } else if (strcmp(iMethod, "meat")==0) {
2075                 infoMethod = INFO_METHOD_MEAT;
2076         } else if (strcmp(iMethod, "bread")==0) {
2077                 infoMethod = INFO_METHOD_BREAD;
2078         } else if (strcmp(iMethod, "hessian")==0) {
2079                 infoMethod = INFO_METHOD_HESSIAN;
2080         } else {
2081                 Rf_error("Unknown information matrix estimation method '%s'", iMethod);
2082         }
2083         return infoMethod;
2084 }
2085
2086 void omxComputeOnce::initFromFrontend(SEXP rObj)
2087 {
2088         super::initFromFrontend(rObj);
2089
2090         SEXP slotValue;
2091         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("from")));
2092         for (int wx=0; wx < Rf_length(slotValue); ++wx) {
2093                 int objNum = INTEGER(slotValue)[wx];
2094                 if (objNum >= 0) {
2095                         omxMatrix *algebra = globalState->algebraList[objNum];
2096                         if (algebra->fitFunction) {
2097                                 setFreeVarGroup(algebra->fitFunction, varGroup);
2098                                 omxCompleteFitFunction(algebra);
2099                         }
2100                         algebras.push_back(algebra);
2101                 } else {
2102                         omxExpectation *expectation = globalState->expectationList[~objNum];
2103                         setFreeVarGroup(expectation, varGroup);
2104                         omxCompleteExpectation(expectation);
2105                         expectations.push_back(expectation);
2106                 }
2107         }
2108
2109         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("verbose")));
2110         verbose = Rf_asInteger(slotValue);
2111
2112         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("what")));
2113         int whatLen = Rf_length(slotValue);
2114         if (algebras.size()) {
2115                 for (int wx=0; wx < whatLen; ++wx) {
2116                         SEXP elem;
2117                         Rf_protect(elem = STRING_ELT(slotValue, wx));
2118                         const char *what = CHAR(elem);
2119                         if      (strcmp(what, "maxAbsChange")==0) mac = true;
2120                         else if (strcmp(what, "set-starting-values")==0) starting = true;
2121                         else if (strcmp(what, "fit")         ==0) fit = true;
2122                         else if (strcmp(what, "gradient")    ==0) gradient = true;
2123                         else if (strcmp(what, "hessian")     ==0) hessian = true;
2124                         else if (strcmp(what, "information") ==0) infoMat = true;
2125                         else if (strcmp(what, "ihessian")    ==0) ihessian = true;
2126                         else omxRaiseErrorf("mxComputeOnce: don't know how to compute %s", what);
2127                 }
2128
2129                 if (hessian && infoMat) Rf_error("Cannot compute the Hessian and Fisher Information matrix simultaneously");
2130         } else {
2131                 for (int wx=0; wx < whatLen; ++wx) {
2132                         SEXP elem;
2133                         Rf_protect(elem = STRING_ELT(slotValue, wx));
2134                         predict.push_back(CHAR(elem));
2135                 }
2136         }
2137
2138         Rf_protect(slotValue = R_do_slot(rObj, Rf_install(".is.bestfit")));
2139         isBestFit = Rf_asLogical(slotValue);
2140
2141         bool howConflict = false;
2142         Rf_protect(slotValue = R_do_slot(rObj, Rf_install("how")));
2143         if (Rf_length(slotValue) > 1) {
2144                 omxRaiseErrorf("mxComputeOnce: more than one method specified");
2145         } else if (Rf_length(slotValue) == 1) {
2146                 SEXP elem;
2147                 Rf_protect(elem = STRING_ELT(slotValue, 0));
2148                 if (algebras.size()) {
2149                         const char *iMethod = CHAR(elem);
2150                         if (infoMat) {
2151                                 infoMethod = stringToInfoMethod(iMethod);
2152                                 if (infoMethod == INFO_METHOD_MEAT && gradient && whatLen == 2) {
2153                                         //OK
2154                                 } else if (whatLen > 1) {
2155                                         howConflict = true;
2156                                 }
2157                         } else {
2158                                 omxRaiseErrorf("mxComputeOnce: unknown method %s requested", iMethod);
2159                         }
2160                 } else {
2161                         how = CHAR(elem);
2162                         if (whatLen > 1) howConflict = true;
2163                 }
2164         }
2165         if (howConflict) {
2166                 omxRaiseErrorf("mxComputeOnce: when how is specified, you can only compute one thing at a time");
2167         }
2168
2169         if (algebras.size() == 1 && algebras[0]->fitFunction) {
2170                 omxFitFunction *ff = algebras[0]->fitFunction;
2171                 if (gradient && !ff->gradientAvailable) {
2172                         Rf_error("Gradient requested but not available");
2173                 }
2174                 if ((hessian || ihessian || hgprod) && !ff->hessianAvailable) {
2175                         // add a separate flag for hgprod TODO
2176                         Rf_error("Hessian requested but not available");
2177                 }
2178                 // add check for information TODO
2179         }
2180 }
2181
2182 void omxComputeOnce::computeImpl(FitContext *fc)
2183 {
2184         if (algebras.size()) {
2185                 int want = 0;
2186                 if (starting) {
2187                         want |= FF_COMPUTE_STARTING;
2188                 }
2189                 if (mac) {
2190                         want |= FF_COMPUTE_MAXABSCHANGE;
2191                         fc->mac = 0;
2192                 }
2193                 if (fit) {
2194                         want |= FF_COMPUTE_FIT;
2195                         if (isBestFit) want |= FF_COMPUTE_BESTFIT;
2196                         fc->fit = 0;
2197                 }
2198                 if (gradient) {
2199                         want |= FF_COMPUTE_GRADIENT;
2200                         fc->grad = Eigen::VectorXd::Zero(fc->numParam);
2201                 }
2202                 if (hessian) {
2203                         want |= FF_COMPUTE_HESSIAN;
2204                         fc->clearHessian();
2205                 }
2206                 if (infoMat) {
2207                         want |= FF_COMPUTE_INFO;
2208                         fc->infoMethod = infoMethod;
2209                         fc->grad = Eigen::VectorXd::Zero(fc->numParam);
2210                         fc->clearHessian();
2211                         fc->preInfo();
2212                 }
2213                 if (ihessian) {
2214                         want |= FF_COMPUTE_IHESSIAN;
2215                         fc->clearHessian();
2216                 }
2217                 if (!want) return;
2218
2219                 for (size_t wx=0; wx < algebras.size(); ++wx) {
2220                         omxMatrix *algebra = algebras[wx];
2221                         if (algebra->fitFunction) {
2222                                 omxFitFunctionCompute(algebra->fitFunction, FF_COMPUTE_PREOPTIMIZE, fc);
2223                                 omxFitFunctionCompute(algebra->fitFunction, want, fc);
2224                                 fc->fit = algebra->data[0];
2225                                 if (infoMat) {
2226                                         fc->postInfo();
2227                                 }
2228                         } else {
2229                                 omxForceCompute(algebra);
2230                         }
2231                 }
2232         } else if (expectations.size()) {
2233                 if (predict.size() > 1) Rf_error("Not implemented");
2234                 for (size_t wx=0; wx < expectations.size(); ++wx) {
2235                         omxExpectation *expectation = expectations[wx];
2236                         omxExpectationCompute(expectation, predict[0], how);
2237                 }
2238         }
2239 }
2240
2241 void omxComputeOnce::reportResults(FitContext *fc, MxRList *slots, MxRList *out)
2242 {
2243         if (algebras.size()==0 || algebras[0]->fitFunction == NULL) return;
2244
2245         omxMatrix *algebra = algebras[0];
2246         omxPopulateFitFunction(algebra, out);
2247 }
2248
2249 void ComputeStandardError::reportResults(FitContext *fc, MxRList *slots, MxRList *)
2250 {
2251         fc->allocStderrs();  // at least report NAs
2252
2253         const size_t numParams = fc->numParam;
2254
2255         Eigen::VectorXd ihessDiag(fc->ihessDiag());
2256
2257         const double scale = fabs(Global->llScale);
2258
2259         // This function calculates the standard errors from the Hessian matrix
2260         // sqrt(scale * diag(solve(hessian)))
2261
2262         for(size_t i = 0; i < numParams; i++) {
2263                 double got = ihessDiag[i];
2264                 if (got <= 0) continue;
2265                 fc->stderrs[i] = sqrt(scale * got);
2266         }
2267 }
2268
2269 /*
2270 Date: Fri, 3 Jan 2014 14:02:34 -0600
2271 From: Michael Hunter <mhunter@ou.edu>
2272
2273 Determining positive definiteness of matrix is typically done by
2274 trying the Cholesky decomposition.  If it fails, the matrix is not
2275 positive definite; if it passes, the matrix is.  The benefit of the
2276 Cholesky is that it's much faster and easier to compute than a set of
2277 eigenvalues.
2278
2279 The BLAS/LAPACK routine DTRCO quickly computes a good approximation to the
2280 reciprocal condition number of a triangular matrix.  Hand it the Cholesky
2281 (a triangular matrix) the rest is history.  I don't think we need the
2282 exact condition number as long as it's just for finding very
2283 ill-conditioned problems.  For the solution to a linear system of
2284 equations, if you really care about the difference in precision between
2285 1e-14 and 1e-11, then the exact condition number is needed.  Otherwise, the
2286 approximation is faster and equally useful.
2287 */
2288 void ComputeHessianQuality::reportResults(FitContext *fc, MxRList *slots, MxRList *)
2289 {
2290         // See Luenberger & Ye (2008) Second Order Test (p. 190) and Condition Number (p. 239)
2291
2292         // memcmp is required here because NaN != NaN always
2293         if (fc->infoDefinite != NA_LOGICAL ||
2294             memcmp(&fc->infoCondNum, &NA_REAL, sizeof(double)) != 0) return; // already set elsewhere
2295
2296         int numParams = int(fc->varGroup->vars.size());
2297
2298         double *mat = fc->getDenseHessianish();
2299         omxBuffer<double> hessWork(numParams * numParams);
2300         memcpy(hessWork.data(), mat, sizeof(double) * numParams * numParams);
2301
2302         char jobz = 'N';
2303         char range = 'A';
2304         char uplo = 'U';
2305         double abstol = 0;
2306         int m;
2307         omxBuffer<double> w(numParams);
2308         double optWork;
2309         int lwork = -1;
2310         omxBuffer<int> iwork(5 * numParams);
2311         int info;
2312         double realIgn = 0;
2313         int intIgn = 0;
2314         F77_CALL(dsyevx)(&jobz, &range, &uplo, &numParams, hessWork.data(),
2315                          &numParams, &realIgn, &realIgn, &intIgn, &intIgn, &abstol, &m, w.data(),
2316                          NULL, &numParams, &optWork, &lwork, iwork.data(), NULL, &info);
2317
2318         lwork = optWork;
2319         omxBuffer<double> work(lwork);
2320         F77_CALL(dsyevx)(&jobz, &range, &uplo, &numParams, hessWork.data(),
2321                          &numParams, &realIgn, &realIgn, &intIgn, &intIgn, &abstol, &m, w.data(),
2322                          NULL, &numParams, work.data(), &lwork, iwork.data(), NULL, &info);
2323         if (info < 0) {
2324                 Rf_error("dsyevx %d", info);
2325         } else if (info) {
2326                 return;
2327         }
2328
2329         bool definite = true;
2330         bool neg = w[0] < 0;
2331         for (int px=1; px < numParams; ++px) {
2332                 if ((w[px] < 0) ^ neg) {
2333                         definite = false;
2334                         break;
2335                 }
2336         }
2337
2338         fc->infoDefinite = definite;
2339
2340         if (definite) {
2341                 double ev[2] = { fabs(w[0]), fabs(w[numParams-1]) };
2342                 if (ev[0] < ev[1]) std::swap(ev[0], ev[1]);
2343                 double got = ev[0] / ev[1];
2344                 if (std::isfinite(got)) fc->infoCondNum = got;
2345         }
2346 }
2347
2348 void ComputeReportDeriv::reportResults(FitContext *fc, MxRList *, MxRList *result)
2349 {
2350         size_t numFree = fc->numParam;
2351
2352         if (fc->wanted & FF_COMPUTE_GRADIENT) {
2353                 if (!fc->grad.data()) {
2354                         Rf_warning("ComputeReportDeriv: Gradient requested but not available");
2355                 } else {
2356                         SEXP Rgradient;
2357                         Rf_protect(Rgradient = Rf_allocVector(REALSXP, numFree));
2358                         memcpy(REAL(Rgradient), fc->grad.data(), sizeof(double) * numFree);
2359                         result->add("gradient", Rgradient);
2360                 }
2361         }
2362         if (fc->wanted & FF_COMPUTE_HESSIAN) {
2363                 SEXP Rhessian;
2364                 Rf_protect(Rhessian = Rf_allocMatrix(REALSXP, numFree, numFree));
2365                 fc->copyDenseHess(REAL(Rhessian));
2366                 result->add("hessian", Rhessian);
2367         }
2368         if (fc->wanted & FF_COMPUTE_IHESSIAN) {
2369                 SEXP Rihessian;
2370                 Rf_protect(Rihessian = Rf_allocMatrix(REALSXP, numFree, numFree));
2371                 fc->copyDenseIHess(REAL(Rihessian));
2372                 result->add("ihessian", Rihessian);
2373         }
2374 }