Handle model.mat[R,C] labels in BA81 latent distribution
[openmx:openmx.git] / src / omxFitFunctionBA81.cpp
1 /*
2   Copyright 2012-2014 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <algorithm>
19
20 #include "omxFitFunction.h"
21 #include "omxExpectationBA81.h"
22 #include "omxOpenmpWrap.h"
23 #include "libifa-rpf.h"
24 #include "matrix.h"
25 #include "omxBuffer.h"
26
27 struct BA81FitState {
28         //private:
29         void copyEstimates(BA81Expect *estate);
30         //public:
31
32         // numFreeParam is used by both the item and latent parameter
33         // maps (for Hessian offsets) and is initialized first to
34         // allow either the item or latent parameter map to be built
35         // first. We cache it here to avoid passing the FitContext
36         // around everywhere.
37         size_t numFreeParam;                 // the current var group's
38
39         int haveLatentMap;
40         std::vector<int> latentMap;
41         bool freeLatents;                    // only to support old style direct latents, remove TODO
42         int ElatentVersion;
43
44         int haveItemMap;
45         int itemDerivPadSize;                // maxParam + maxParam*(1+maxParam)/2
46         bool freeItemParams;
47         std::vector<HessianBlock> hBlocks;
48         std::vector<int> paramPerItem;       // itemParam->cols
49         std::vector<const char *> paramFlavor;        // numFreeParam
50         // gradient: itemParam->cols * itemDerivPadSize -> index of free parameter
51         // Hessian:  itemParam->cols * itemDerivPadSize -> full Hessian offset in current varGroup
52         std::vector<int> paramMap;
53         std::vector<int> hbMap;              // itemParam->cols * itemDerivPadSize -> per-item HessianBlock offset
54         std::vector<int> itemGradMap;        // index of gradient -> index of free parameter
55         std::vector<int> itemParamFree;      // itemParam->cols * itemParam->rows : remove TODO
56
57         // The following are only used to compute FF_COMPUTE_MAXABSCHANGE
58         omxMatrix *itemParam;
59         omxMatrix *latentMean;
60         omxMatrix *latentCov;
61
62         bool returnRowLikelihoods;
63
64         BA81FitState();
65         ~BA81FitState();
66 };
67
68 // writes to upper triangle of full matrix
69 static void addSymOuterProd(const double weight, const double *vec, const int len, double *out)
70 {
71         for (int d1=0; d1 < len; ++d1) {
72                 for (int d2=0; d2 <= d1; ++d2) {
73                         out[d1 * len + d2] += weight * vec[d1] * vec[d2];
74                 }
75         }
76 }
77
78 BA81FitState::BA81FitState()
79 {
80         haveItemMap = FREEVARGROUP_INVALID;
81         haveLatentMap = FREEVARGROUP_INVALID;
82 }
83
84 void BA81FitState::copyEstimates(BA81Expect *estate)
85 {
86         omxCopyMatrix(itemParam, estate->itemParam);
87         if (estate->_latentMeanOut) omxCopyMatrix(latentMean, estate->_latentMeanOut);
88         if (estate->_latentCovOut)  omxCopyMatrix(latentCov, estate->_latentCovOut);
89 }
90
91 static void buildLatentParamMap(omxFitFunction* oo, FitContext *fc)
92 {
93         FreeVarGroup *fvg = fc->varGroup;
94         BA81FitState *state = (BA81FitState *) oo->argStruct;
95         std::vector<int> &latentMap = state->latentMap;
96         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
97         int maxAbilities = estate->grp.maxAbilities;
98
99         if (state->haveLatentMap == fc->varGroup->id[0]) return;
100         if (estate->verbose >= 1) mxLog("%s: rebuild latent parameter map for var group %d",
101                                         oo->matrix->name, fc->varGroup->id[0]);
102
103         state->freeLatents = false;
104
105         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
106         latentMap.assign(numLatents, -1);
107
108         int meanNum = 0;
109         if (estate->_latentMeanOut) meanNum = ~estate->_latentMeanOut->matrixNumber;
110         int covNum = 0;
111         if (estate->_latentCovOut) covNum = ~estate->_latentCovOut->matrixNumber;
112
113         int numParam = int(fvg->vars.size());
114         for (int px=0; px < numParam; px++) {
115                 omxFreeVar *fv = fvg->vars[px];
116                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
117                         omxFreeVarLocation *loc = &fv->locations[lx];
118                         int matNum = loc->matrix;
119                         if (matNum == meanNum && estate->_latentMeanOut) {
120                                 latentMap[loc->row + loc->col] = px;
121                                 state->freeLatents = true;
122                         } else if (matNum == covNum && estate->_latentCovOut) {
123                                 int a1 = loc->row;
124                                 int a2 = loc->col;
125                                 if (a1 < a2) std::swap(a1, a2);
126                                 int cell = maxAbilities + triangleLoc1(a1) + a2;
127                                 if (latentMap[cell] == -1) {
128                                         latentMap[cell] = px;
129
130                                         if (a1 == a2 && fv->lbound == NEG_INF) {
131                                                 fv->lbound = BA81_MIN_VARIANCE;  // variance must be positive
132                                                 if (fc->est[px] < fv->lbound) {
133                                                         Rf_error("Starting value for variance %s is not positive", fv->name);
134                                                 }
135                                         }
136                                 } else if (latentMap[cell] != px) {
137                                         // doesn't detect similar problems in multigroup constraints TODO
138                                         Rf_error("Covariance matrix must be constrained to preserve symmetry");
139                                 }
140                                 state->freeLatents = true;
141                         }
142                 }
143         }
144         state->haveLatentMap = fc->varGroup->id[0];
145 }
146
147 static void buildItemParamMap(omxFitFunction* oo, FitContext *fc)
148 {
149         FreeVarGroup *fvg = fc->varGroup;
150         BA81FitState *state = (BA81FitState *) oo->argStruct;
151         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
152         std::vector<const double*> &itemSpec = estate->grp.spec;
153
154         if (state->haveItemMap == fc->varGroup->id[0]) return;
155         if (estate->verbose >= 1) mxLog("%s: rebuild item parameter map for var group %d",
156                                         oo->matrix->name, fc->varGroup->id[0]);
157
158         omxMatrix *itemParam = estate->itemParam;
159         int size = itemParam->cols * state->itemDerivPadSize;
160         state->freeItemParams = false;
161         state->hBlocks.clear();
162         state->hBlocks.resize(itemParam->cols);
163         state->hbMap.assign(size, -1);     // matrix location to HessianBlock offset
164         state->paramMap.assign(size, -1);  // matrix location to free param index
165         state->itemParamFree.assign(itemParam->rows * itemParam->cols, FALSE);
166
167         const size_t numFreeParam = state->numFreeParam;
168         state->paramFlavor.assign(numFreeParam, NULL);
169
170         int totalParam = 0;
171         state->paramPerItem.resize(itemParam->cols);
172         for (int cx=0; cx < itemParam->cols; ++cx) {
173                 const double *spec = itemSpec[cx];
174                 const int id = spec[RPF_ISpecID];
175                 const int numParam = (*rpf_model[id].numParam)(spec);
176                 state->paramPerItem[cx] = numParam;
177                 totalParam += numParam;
178         }
179         state->itemGradMap.assign(totalParam, -1);
180
181         for (size_t px=0; px < numFreeParam; px++) {
182                 omxFreeVar *fv = fvg->vars[px];
183                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
184                         omxFreeVarLocation *loc = &fv->locations[lx];
185                         int matNum = ~loc->matrix;
186                         if (matNum != itemParam->matrixNumber) continue;
187
188                         int at = loc->col * state->itemDerivPadSize + loc->row;
189                         state->paramMap[at] = px;
190                         std::vector<int> &varMap = state->hBlocks[loc->col].vars;
191                         if (std::find(varMap.begin(), varMap.end(), px) == varMap.end()) {
192                                 varMap.push_back(px);
193                         }
194                         state->itemParamFree[loc->col * itemParam->rows + loc->row] = TRUE;
195                         state->freeItemParams = true;
196
197                         const double *spec = itemSpec[loc->col];
198                         int id = spec[RPF_ISpecID];
199                         const char *flavor;
200                         double upper, lower;
201                         (*rpf_model[id].paramInfo)(spec, loc->row, &flavor, &upper, &lower);
202                         if (state->paramFlavor[px] == 0) {
203                                 state->paramFlavor[px] = flavor;
204                         } else if (strcmp(state->paramFlavor[px], flavor) != 0) {
205                                 Rf_error("Cannot equate %s with %s[%d,%d]", fv->name,
206                                       itemParam->name, loc->row, loc->col);
207                         }
208                         if (fv->lbound == NEG_INF && std::isfinite(lower)) {
209                                 fv->lbound = lower;
210                                 if (fc->est[px] < fv->lbound) {
211                                         Rf_error("Starting value %s %f less than lower bound %f",
212                                               fv->name, fc->est[px], lower);
213                                 }
214                         }
215                         if (fv->ubound == INF && std::isfinite(upper)) {
216                                 fv->ubound = upper;
217                                 if (fc->est[px] > fv->ubound) {
218                                         Rf_error("Starting value %s %f greater than upper bound %f",
219                                               fv->name, fc->est[px], upper);
220                                 }
221                         }
222                 }
223         }
224
225         int gradOffset = 0;
226         for (int cx=0; cx < itemParam->cols; ++cx) {
227                 for (int rx=0; rx < state->paramPerItem[cx]; ++rx) {
228                         int at = cx * state->itemDerivPadSize + rx;
229                         int px = state->paramMap[at];
230                         if (px >= 0) state->itemGradMap[gradOffset] = px;
231                         ++gradOffset;
232                 }
233         }
234
235         for (int cx=0; cx < itemParam->cols; ++cx) {
236                 HessianBlock &hb = state->hBlocks[cx];
237                 int numParam = state->paramPerItem[cx];
238                 for (int p1=0; p1 < numParam; p1++) {
239                         const int outer_at1 = state->paramMap[cx * state->itemDerivPadSize + p1];
240                         if (outer_at1 < 0) continue;
241                         const int outer_hb1 = std::lower_bound(hb.vars.begin(), hb.vars.end(), outer_at1) - hb.vars.begin();
242                         if (hb.vars[outer_hb1] != outer_at1) Rf_error("oops");
243
244                         for (int p2=0; p2 <= p1; p2++) {
245                                 int at1 = outer_at1;
246                                 int hb1 = outer_hb1;
247                                 int at2 = state->paramMap[cx * state->itemDerivPadSize + p2];
248                                 if (at2 < 0) continue;
249                                 if (p1 == p2 && at1 != at2) Rf_error("oops");
250                                 int hb2 = std::lower_bound(hb.vars.begin(), hb.vars.end(), at2) - hb.vars.begin();
251                                 if (hb.vars[hb2] != at2) Rf_error("oops");
252
253                                 if (at1 < at2) std::swap(at1, at2); // outer_at1 unaffected
254                                 if (hb1 < hb2) std::swap(hb1, hb2); // outer_hb1 unaffected
255
256                                 //                              mxLog("Item %d param(%d,%d) -> H[%d,%d] B[%d,%d]",
257                                 //                                    cx, p1, p2, at1, at2, hb1, hb2);
258                                 int at = cx * state->itemDerivPadSize + numParam + triangleLoc1(p1) + p2;
259                                 int hoffset = at1 * numFreeParam + at2;
260
261                                 state->paramMap[at] = numFreeParam + hoffset;
262                                 state->hbMap[at] = hb1 * hb.vars.size() + hb2;
263                         }
264                 }
265         }
266
267         state->haveItemMap = fc->varGroup->id[0];
268         //pia(state->paramMap.data(), state->itemDerivPadSize, itemParam->cols);
269 }
270
271 static double
272 ba81ComputeEMFit(omxFitFunction* oo, int want, FitContext *fc)
273 {
274         const double Scale = Global->llScale;
275         BA81FitState *state = (BA81FitState*) oo->argStruct;
276         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
277         omxMatrix *itemParam = estate->itemParam;
278         std::vector<const double*> &itemSpec = estate->grp.spec;
279         std::vector<int> &cumItemOutcomes = estate->grp.cumItemOutcomes;
280         ba81NormalQuad &quad = estate->getQuad();
281         const int maxDims = quad.maxDims;
282         const size_t numItems = itemSpec.size();
283         const int do_fit = want & FF_COMPUTE_FIT;
284         const int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN | FF_COMPUTE_IHESSIAN);
285
286         if (do_deriv && !state->freeItemParams) {
287                 omxRaiseErrorf("%s: no free parameters", oo->matrix->name);
288                 return NA_REAL;
289         }
290
291         if (state->returnRowLikelihoods) {
292                 omxRaiseErrorf("%s: vector=TRUE not implemented", oo->matrix->name);
293                 return NA_REAL;
294         }
295
296         if (estate->verbose >= 3) mxLog("%s: complete data fit(want fit=%d deriv=%d)", oo->matrix->name, do_fit, do_deriv);
297
298         if (do_fit) estate->grp.ba81OutcomeProb(itemParam->data, TRUE);
299
300         const int thrDerivSize = itemParam->cols * state->itemDerivPadSize;
301         std::vector<double> thrDeriv(thrDerivSize * Global->numThreads);
302         double *wherePrep = quad.wherePrep.data();
303
304         double ll = 0;
305 #pragma omp parallel for num_threads(Global->numThreads) reduction(+:ll)
306         for (size_t ix=0; ix < numItems; ix++) {
307                 const int thrId = omx_absolute_thread_num();
308                 const double *spec = itemSpec[ix];
309                 const int id = spec[RPF_ISpecID];
310                 const int dims = spec[RPF_ISpecDims];
311                 Eigen::VectorXd ptheta(dims);
312                 const rpf_dLL1_t dLL1 = rpf_model[id].dLL1;
313                 const int iOutcomes = estate->grp.itemOutcomes[ix];
314                 const int outcomeBase = cumItemOutcomes[ix] * quad.totalQuadPoints;
315                 const double *weight = estate->expected + outcomeBase;
316                 const double *oProb = estate->grp.outcomeProb + outcomeBase;
317                 const double *iparam = omxMatrixColumn(itemParam, ix);
318                 double *myDeriv = thrDeriv.data() + thrDerivSize * thrId + ix * state->itemDerivPadSize;
319
320                 for (int qx=0; qx < quad.totalQuadPoints; qx++) {
321                         if (do_fit) {
322                                 for (int ox=0; ox < iOutcomes; ox++) {
323                                         ll += weight[ox] * oProb[ox];
324                                 }
325                         }
326                         if (do_deriv) {
327                                 double *where = wherePrep + qx * maxDims;
328                                 for (int dx=0; dx < dims; dx++) {
329                                         ptheta[dx] = where[std::min(dx, maxDims-1)];
330                                 }
331
332                                 (*dLL1)(spec, iparam, ptheta.data(), weight, myDeriv);
333                         }
334                         weight += iOutcomes;
335                         oProb += iOutcomes;
336                 }
337         }
338
339         size_t excluded = 0;
340
341         if (do_deriv) {
342                 double *deriv0 = thrDeriv.data();
343
344                 int perThread = itemParam->cols * state->itemDerivPadSize;
345                 for (int th=1; th < Global->numThreads; th++) {
346                         double *thrD = thrDeriv.data() + th * perThread;
347                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
348                 }
349
350                 int numFreeParams = int(state->numFreeParam);
351                 int ox=-1;
352                 for (size_t ix=0; ix < numItems; ix++) {
353                         const double *spec = itemSpec[ix];
354                         int id = spec[RPF_ISpecID];
355                         double *iparam = omxMatrixColumn(itemParam, ix);
356                         double *pad = deriv0 + ix * state->itemDerivPadSize;
357                         (*rpf_model[id].dLL2)(spec, iparam, pad);
358
359                         HessianBlock *hb = state->hBlocks[ix].clone();
360                         hb->mat.triangularView<Eigen::Upper>().setZero();
361
362                         for (int dx=0; dx < state->itemDerivPadSize; ++dx) {
363                                 int to = state->paramMap[++ox];
364                                 if (to == -1) continue;
365
366                                 // Need to check because this can happen if
367                                 // lbounds/ubounds are not set appropriately.
368                                 if (0 && !std::isfinite(deriv0[ox])) {
369                                         int item = ox / itemParam->rows;
370                                         mxLog("item parameters:\n");
371                                         const double *spec = itemSpec[item];
372                                         int id = spec[RPF_ISpecID];
373                                         int numParam = (*rpf_model[id].numParam)(spec);
374                                         double *iparam = omxMatrixColumn(itemParam, item);
375                                         pda(iparam, numParam, 1);
376                                         // Perhaps bounds can be pulled in from librpf? TODO
377                                         Rf_error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
378                                                  ox, item, deriv0[ox]);
379                                 }
380
381                                 if (to < numFreeParams) {
382                                         if (want & FF_COMPUTE_GRADIENT) {
383                                                 fc->grad(to) -= Scale * deriv0[ox];
384                                         }
385                                 } else {
386                                         if (want & (FF_COMPUTE_HESSIAN | FF_COMPUTE_IHESSIAN)) {
387                                                 int Hto = state->hbMap[ox];
388                                                 if (Hto >= 0) hb->mat.data()[Hto] -= Scale * deriv0[ox];
389                                         }
390                                 }
391                         }
392                         fc->queue(hb);
393                 }
394         }
395
396         if (excluded && estate->verbose >= 1) {
397                 mxLog("%s: Hessian not positive definite for %d/%d items",
398                       oo->matrix->name, (int) excluded, (int) numItems);
399         }
400         if (excluded == numItems) {
401                 omxRaiseErrorf("Hessian not positive definite for %d/%d items",
402                                (int) excluded, (int) numItems);
403         }
404
405         return Scale * ll;
406 }
407
408 void ba81SetFreeVarGroup(omxFitFunction *oo, FreeVarGroup *fvg)
409 {}
410
411 static void sandwich(omxFitFunction *oo, FitContext *fc)
412 {
413         const double abScale = fabs(Global->llScale);
414         omxExpectation *expectation = oo->expectation;
415         BA81FitState *state = (BA81FitState*) oo->argStruct;
416         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
417         if (estate->verbose >= 1) mxLog("%s: sandwich", oo->matrix->name);
418
419         estate->grp.ba81OutcomeProb(estate->itemParam->data, FALSE);
420
421         const int numThreads = Global->numThreads;
422         const int numUnique = estate->getNumUnique();
423         ba81NormalQuad &quad = estate->getQuad();
424         const int numSpecific = quad.numSpecific;
425         const int maxDims = quad.maxDims;
426         std::vector<int> &rowMap = estate->grp.rowMap;
427         double *rowWeight = estate->grp.rowWeight;
428         std::vector<bool> &rowSkip = estate->grp.rowSkip;
429         const int totalQuadPoints = quad.totalQuadPoints;
430         omxMatrix *itemParam = estate->itemParam;
431         omxBuffer<double> patternLik(numUnique);
432
433         std::vector<const double*> &itemSpec = estate->grp.spec;
434         const int totalOutcomes = estate->totalOutcomes();
435         const int numItems = estate->grp.numItems();
436         const size_t numParam = fc->varGroup->vars.size();
437         const double *wherePrep = quad.wherePrep.data();
438         std::vector<double> thrBreadG(numThreads * numParam * numParam);
439         std::vector<double> thrBreadH(numThreads * numParam * numParam);
440         std::vector<double> thrMeat(numThreads * numParam * numParam);
441
442         if (numSpecific == 0) {
443                 omxBuffer<double> thrLxk(totalQuadPoints * numThreads);
444
445 #pragma omp parallel for num_threads(numThreads)
446                 for (int px=0; px < numUnique; px++) {
447                         if (rowSkip[px]) continue;
448                         int thrId = omx_absolute_thread_num();
449                         double *lxk = thrLxk.data() + thrId * totalQuadPoints;
450                         omxBuffer<double> itemDeriv(state->itemDerivPadSize);
451                         omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
452                         double *breadG = thrBreadG.data() + thrId * numParam * numParam; //a
453                         double *breadH = thrBreadH.data() + thrId * numParam * numParam; //a
454                         double *meat = thrMeat.data() + thrId * numParam * numParam;   //b
455                         std::vector<double> patGrad(numParam);
456
457                         estate->grp.ba81LikelihoodSlow2(px, lxk);
458
459                         // If patternLik is already valid, maybe could avoid this loop TODO
460                         double patternLik1 = 0;
461                         for (int qx=0; qx < totalQuadPoints; qx++) {
462                                 patternLik1 += lxk[qx];
463                         }
464                         patternLik[px] = patternLik1;
465
466                         // if (!validPatternLik(state, patternLik1))  complain
467
468                         double weight = 1 / patternLik[px];
469                         for (int qx=0; qx < totalQuadPoints; qx++) {
470                                 double tmp = lxk[qx] * weight;
471                                 double sqrtTmp = sqrt(tmp);
472
473                                 std::vector<double> gradBuf(numParam);
474                                 int gradOffset = 0;
475
476                                 for (int ix=0; ix < numItems; ++ix) {
477                                         if (ix) gradOffset += state->paramPerItem[ix-1];
478                                         int pick = estate->grp.dataColumns[ix][rowMap[px]];
479                                         if (pick == NA_INTEGER) continue;
480                                         pick -= 1;
481
482                                         const int iOutcomes = estate->itemOutcomes(ix);
483                                         OMXZERO(expected.data(), iOutcomes);
484                                         expected[pick] = 1;
485                                         const double *spec = itemSpec[ix];
486                                         double *iparam = omxMatrixColumn(itemParam, ix);
487                                         const int id = spec[RPF_ISpecID];
488                                         OMXZERO(itemDeriv.data(), state->itemDerivPadSize);
489                                         (*rpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims,
490                                                               expected.data(), itemDeriv.data());
491                                         (*rpf_model[id].dLL2)(spec, iparam, itemDeriv.data());
492
493                                         for (int par = 0; par < state->paramPerItem[ix]; ++par) {
494                                                 int to = state->itemGradMap[gradOffset + par];
495                                                 if (to >= 0) {
496                                                         gradBuf[to] -= itemDeriv[par] * sqrtTmp;
497                                                         patGrad[to] -= itemDeriv[par] * tmp;
498                                                 }
499                                         }
500                                         int derivBase = ix * state->itemDerivPadSize;
501                                         for (int ox=0; ox < state->itemDerivPadSize; ox++) {
502                                                 int to = state->paramMap[derivBase + ox];
503                                                 if (to >= int(numParam)) {
504                                                         int Hto = to - numParam;
505                                                         breadH[Hto] += abScale * itemDeriv[ox] * tmp * rowWeight[px];
506                                                 }
507                                         }
508                                 }
509                                 addSymOuterProd(abScale * rowWeight[px], gradBuf.data(), numParam, breadG);
510                         }
511                         addSymOuterProd(abScale * rowWeight[px], patGrad.data(), numParam, meat);
512                 }
513
514         } else {
515                 const int totalPrimaryPoints = quad.totalPrimaryPoints;
516                 const int specificPoints = quad.quadGridSize;
517                 omxBuffer<double> thrLxk(totalQuadPoints * numSpecific * numThreads);
518                 omxBuffer<double> thrEi(totalPrimaryPoints * numThreads);
519                 omxBuffer<double> thrEis(totalPrimaryPoints * numSpecific * numThreads);
520
521 #pragma omp parallel for num_threads(numThreads)
522                 for (int px=0; px < numUnique; px++) {
523                         if (rowSkip[px]) continue;
524                         int thrId = omx_absolute_thread_num();
525                         omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
526                         omxBuffer<double> itemDeriv(state->itemDerivPadSize);
527                         double *breadG = thrBreadG.data() + thrId * numParam * numParam; //a
528                         double *breadH = thrBreadH.data() + thrId * numParam * numParam; //a
529                         double *meat = thrMeat.data() + thrId * numParam * numParam;   //b
530                         std::vector<double> patGrad(numParam);
531                         double *lxk = thrLxk.data() + totalQuadPoints * numSpecific * thrId;
532                         double *Ei = thrEi.data() + totalPrimaryPoints * thrId;
533                         double *Eis = thrEis.data() + totalPrimaryPoints * numSpecific * thrId;
534                         estate->grp.cai2010EiEis(px, lxk, Eis, Ei);
535
536                         // If patternLik is already valid, maybe could avoid this loop TODO
537                         double patternLik1 = 0;
538                         for (int qx=0; qx < totalPrimaryPoints; ++qx) {
539                                 patternLik1 += Ei[qx];
540                         }
541                         patternLik[px] = patternLik1;
542
543                         for (int qx=0, qloc = 0; qx < totalPrimaryPoints; qx++) {
544                                 for (int sgroup=0; sgroup < numSpecific; ++sgroup) {
545                                         Eis[qloc] = Ei[qx] / Eis[qloc];
546                                         ++qloc;
547                                 }
548                         }
549
550                         // WARNING: I didn't work out the math. I just coded this the way
551                         // it seems to make sense.
552                         for (int qloc=0, eisloc=0, qx=0; eisloc < totalPrimaryPoints * numSpecific; eisloc += numSpecific) {
553                                 for (int sx=0; sx < specificPoints; sx++) {
554                                         for (int Sgroup=0; Sgroup < numSpecific; Sgroup++) {
555                                                 std::vector<double> gradBuf(numParam);
556                                                 int gradOffset = 0;
557                                                 double lxk1 = lxk[qloc + Sgroup];
558                                                 double Eis1 = Eis[eisloc + Sgroup];
559                                                 double tmp = Eis1 * lxk1 / patternLik1;
560                                                 double sqrtTmp = sqrt(tmp);
561                                                 for (int ix=0; ix < numItems; ++ix) {
562                                                         if (ix) gradOffset += state->paramPerItem[ix-1];
563                                                         if (estate->grp.Sgroup[ix] != Sgroup) continue;
564                                                         int pick = estate->grp.dataColumns[ix][rowMap[px]];
565                                                         if (pick == NA_INTEGER) continue;
566                                                         OMXZERO(expected.data(), estate->itemOutcomes(ix));
567                                                         expected[pick-1] = 1;
568                                                         const double *spec = itemSpec[ix];
569                                                         double *iparam = omxMatrixColumn(itemParam, ix);
570                                                         const int id = spec[RPF_ISpecID];
571                                                         const int dims = spec[RPF_ISpecDims];
572                                                         OMXZERO(itemDeriv.data(), state->itemDerivPadSize);
573                                                         const double *where = wherePrep + qx * maxDims;
574                                                         Eigen::VectorXd ptheta(dims);
575                                                         for (int dx=0; dx < dims; dx++) {
576                                                                 ptheta[dx] = where[std::min(dx, maxDims-1)];
577                                                         }
578                                                         (*rpf_model[id].dLL1)(spec, iparam, ptheta.data(),
579                                                                               expected.data(), itemDeriv.data());
580                                                         (*rpf_model[id].dLL2)(spec, iparam, itemDeriv.data());
581
582                                                         for (int par = 0; par < state->paramPerItem[ix]; ++par) {
583                                                                 int to = state->itemGradMap[gradOffset + par];
584                                                                 if (to >= 0) {
585                                                                         gradBuf[to] -= itemDeriv[par] * sqrtTmp;
586                                                                         patGrad[to] -= itemDeriv[par] * tmp;
587                                                                 }
588                                                         }
589                                                         int derivBase = ix * state->itemDerivPadSize;
590                                                         for (int ox=0; ox < state->itemDerivPadSize; ox++) {
591                                                                 int to = state->paramMap[derivBase + ox];
592                                                                 if (to >= int(numParam)) {
593                                                                         int Hto = to - numParam;
594                                                                         breadH[Hto] += (abScale * itemDeriv[ox] *
595                                                                                         tmp * rowWeight[px]);
596                                                                 }
597                                                         }
598                                                 }
599                                                 addSymOuterProd(abScale * rowWeight[px], gradBuf.data(), numParam, breadG);
600                                         }
601                                         qloc += numSpecific;
602                                         ++qx;
603                                 }
604                         }
605                         addSymOuterProd(abScale * rowWeight[px], patGrad.data(), numParam, meat);
606                 }
607         }
608
609         // only need upper triangle TODO
610         for (int tx=1; tx < numThreads; ++tx) {
611                 double *th = thrBreadG.data() + tx * numParam * numParam;
612                 for (size_t en=0; en < numParam * numParam; ++en) {
613                         thrBreadG[en] += th[en];
614                 }
615         }
616         for (int tx=1; tx < numThreads; ++tx) {
617                 double *th = thrBreadH.data() + tx * numParam * numParam;
618                 for (size_t en=0; en < numParam * numParam; ++en) {
619                         thrBreadH[en] += th[en];
620                 }
621         }
622         for (int tx=1; tx < numThreads; ++tx) {
623                 double *th = thrMeat.data() + tx * numParam * numParam;
624                 for (size_t en=0; en < numParam * numParam; ++en) {
625                         thrMeat[en] += th[en];
626                 }
627         }
628         //pda(thrBreadG.data(), numParam, numParam);
629         //pda(thrBreadH.data(), numParam, numParam);
630         //pda(thrMeat.data(), numParam, numParam);
631         if (fc->infoA) {
632                 for (size_t d1=0; d1 < numParam; ++d1) {
633                         for (size_t d2=0; d2 < numParam; ++d2) {
634                                 int cell = d1 * numParam + d2;
635                                 fc->infoA[cell] += thrBreadH[cell] - thrBreadG[cell] + thrMeat[cell];
636                         }
637                 }
638         }
639         if (fc->infoB) {
640                 for (size_t d1=0; d1 < numParam; ++d1) {
641                         for (size_t d2=0; d2 < numParam; ++d2) {
642                                 int cell = d1 * numParam + d2;
643                                 fc->infoB[cell] += thrMeat[cell];
644                         }
645                 }
646         }
647 }
648
649 static void setLatentStartingValues(omxFitFunction *oo, FitContext *fc) //remove? TODO
650 {
651         BA81FitState *state = (BA81FitState*) oo->argStruct;
652         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
653         std::vector<int> &latentMap = state->latentMap;
654         ba81NormalQuad &quad = estate->getQuad();
655         int maxAbilities = quad.maxAbilities;
656         omxMatrix *estMean = estate->estLatentMean;
657         omxMatrix *estCov = estate->estLatentCov;
658
659         for (int a1 = 0; a1 < maxAbilities; ++a1) {
660                 if (latentMap[a1] >= 0) {
661                         int to = latentMap[a1];
662                         fc->est[to] = omxVectorElement(estMean, a1);
663                 }
664
665                 for (int a2 = 0; a2 <= a1; ++a2) {
666                         int to = latentMap[maxAbilities + triangleLoc1(a1) + a2];
667                         if (to < 0) continue;
668                         fc->est[to] = omxMatrixElement(estCov, a1, a2);
669                 }
670         }
671
672         if (estate->verbose >= 1) {
673                 mxLog("%s: set latent parameters for version %d",
674                       oo->matrix->name, estate->ElatentVersion);
675         }
676 }
677
678 static void mapLatentDeriv(BA81FitState *state, BA81Expect *estate, double piece,
679                            double *derivCoef, double *derivOut)
680 {
681         if (!state->freeLatents) return;
682         ba81NormalQuad &quad = estate->getQuad();
683         const int maxAbilities = quad.maxAbilities;
684         const int pmax = quad.numSpecific? quad.maxDims - 1 : quad.maxDims;
685
686         int cx = 0;
687         for (int d1=0; d1 < pmax; ++d1) {
688                 double amt1 = piece * derivCoef[d1];
689                 derivOut[d1] += amt1;
690                 for (int d2=0; d2 <= d1; ++d2) {
691                         int to = maxAbilities + cx;
692                         double amt2 = piece * derivCoef[pmax + cx];
693                         derivOut[to] += amt2;
694                         ++cx;
695                 }
696         }
697 }
698
699 static void mapLatentDerivS(BA81FitState *state, BA81Expect *estate, int sgroup, double piece,
700                             double *derivCoef, double *derivOut)
701 {
702         if (!state->freeLatents) return;
703         ba81NormalQuad &quad = estate->getQuad();
704         int maxAbilities = quad.maxAbilities;
705         int maxDims = quad.maxDims;
706         int pmax = maxDims;
707         if (quad.numSpecific) pmax -= 1;
708
709         int sdim = pmax + sgroup;
710         double amt3 = piece * derivCoef[0];
711         derivOut[sdim] += amt3;
712
713         double amt4 = piece * derivCoef[1];
714         int to = maxAbilities + triangleLoc0(sdim);
715         derivOut[to] += amt4;
716 }
717
718 static void calcDerivCoef(FitContext *fc, BA81FitState *state, BA81Expect *estate, double *icov,
719                           const double *where, double *derivCoef)
720 {
721         ba81NormalQuad &quad = estate->getQuad();
722         Eigen::VectorXd mean;
723         Eigen::MatrixXd cov;
724         estate->getLatentDistribution(fc, mean, cov);
725         const int pDims = quad.numSpecific? quad.maxDims - 1 : quad.maxDims;
726         const char R='R';
727         const char L='L';
728         const char U='U';
729         const double alpha = 1;
730         const double beta = 0;
731         const int one = 1;
732
733         std::vector<double> whereDiff(pDims);
734         std::vector<double> whereGram(triangleLoc1(pDims));
735         for (int d1=0; d1 < pDims; ++d1) {
736                 whereDiff[d1] = where[d1] - mean[d1];
737         }
738         gramProduct(whereDiff.data(), whereDiff.size(), whereGram.data());
739
740         F77_CALL(dsymv)(&U, &pDims, &alpha, icov, &pDims, whereDiff.data(), &one,
741                         &beta, derivCoef, &one);
742
743         std::vector<double> covGrad1(pDims * pDims);
744         std::vector<double> covGrad2(pDims * pDims);
745
746         int cx=0;
747         for (int d1=0; d1 < pDims; ++d1) {
748                 for (int d2=0; d2 <= d1; ++d2) {
749                         covGrad1[d2 * pDims + d1] = cov(d2,d1) - whereGram[cx];
750                         ++cx;
751                 }
752         }
753
754         F77_CALL(dsymm)(&R, &L, &pDims, &pDims, &alpha, covGrad1.data(), &pDims, icov,
755                         &pDims, &beta, covGrad2.data(), &pDims);
756         F77_CALL(dsymm)(&R, &L, &pDims, &pDims, &alpha, icov, &pDims, covGrad2.data(),
757                         &pDims, &beta, covGrad1.data(), &pDims);
758
759         for (int d1=0; d1 < pDims; ++d1) {
760                 covGrad1[d1 * pDims + d1] /= 2.0;
761         }
762
763         cx = pDims;
764         for (int d1=0; d1 < pDims; ++d1) {
765                 int cell = d1 * pDims;
766                 for (int d2=0; d2 <= d1; ++d2) {
767                         derivCoef[cx] = -covGrad1[cell + d2];
768                         ++cx;
769                 }
770         }
771 }
772
773 static void calcDerivCoef1(FitContext *fc, BA81FitState *state, BA81Expect *estate,
774                            const double *where, int sgroup, double *derivCoef)
775 {
776         Eigen::VectorXd mean;
777         Eigen::MatrixXd cov;
778         estate->getLatentDistribution(fc, mean, cov);
779         ba81NormalQuad &quad = estate->getQuad();
780         const int maxDims = quad.maxDims;
781         const int specific = maxDims - 1 + sgroup;
782         double svar = cov(specific, specific);
783         double whereDiff = where[maxDims-1] - mean[specific];
784         derivCoef[0] = whereDiff / svar;
785         derivCoef[1] = -(svar - whereDiff * whereDiff) / (2 * svar * svar);
786 }
787
788 static void gradCov_finish_1pat(const double weight, const double rowWeight, const size_t numItems,
789                             const int numLatents, const size_t numParam,
790                             BA81FitState *state, BA81Expect *estate, omxMatrix *itemParam,
791                             std::vector<double> &deriv0, std::vector<double> &latentGrad,
792                             const double Scale,
793                             std::vector<double> &patGrad,
794                             double *grad, double *meat)
795 {
796         int gradOffset = 0;
797         for (size_t ix=0; ix < numItems; ++ix) {
798                 const double *spec = estate->itemSpec(ix);
799                 double *iparam = omxMatrixColumn(itemParam, ix);
800                 const int id = spec[RPF_ISpecID];
801                 double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize;
802                 (*rpf_model[id].dLL2)(spec, iparam, myDeriv);
803
804                 for (int par = 0; par < state->paramPerItem[ix]; ++par) {
805                         int to = state->itemGradMap[gradOffset];
806                         if (to >= 0) patGrad[to] -= weight * myDeriv[par];
807                         ++gradOffset;
808                 }
809         }
810
811         for (int lx=0; lx < numLatents; ++lx) {
812                 int to = state->latentMap[lx];
813                 if (to >= 0) patGrad[to] += weight * latentGrad[lx];
814         }
815         for (size_t par=0; par < numParam; ++par) {
816                 grad[par] += patGrad[par] * Scale * rowWeight;
817         }
818         addSymOuterProd(fabs(Scale) * rowWeight, patGrad.data(), numParam, meat);
819 }
820
821 static void gradCov(omxFitFunction *oo, FitContext *fc)
822 {
823         const double Scale = Global->llScale;
824         omxExpectation *expectation = oo->expectation;
825         BA81FitState *state = (BA81FitState*) oo->argStruct;
826         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
827         if (estate->verbose >= 1) mxLog("%s: cross product approximation", oo->matrix->name);
828
829         estate->grp.ba81OutcomeProb(estate->itemParam->data, FALSE);
830
831         const int numThreads = Global->numThreads;
832         const int numUnique = estate->getNumUnique();
833         ba81NormalQuad &quad = estate->getQuad();
834         const int numSpecific = quad.numSpecific;
835         const int maxDims = quad.maxDims;
836         const int pDims = numSpecific? maxDims-1 : maxDims;
837         const int maxAbilities = quad.maxAbilities;
838         Eigen::MatrixXd icovMat(pDims, pDims);
839         {
840                 Eigen::VectorXd mean;
841                 Eigen::MatrixXd srcMat;
842                 estate->getLatentDistribution(fc, mean, srcMat);
843                 icovMat = srcMat.topLeftCorner(pDims, pDims);
844                 Matrix tmp(icovMat.data(), pDims, pDims);
845                 int info = InvertSymmetricPosDef(tmp, 'U');
846                 if (info) {
847                         omxRaiseErrorf("%s: latent covariance matrix is not positive definite", oo->matrix->name);
848                         return;
849                 }
850                 icovMat.triangularView<Eigen::Lower>() = icovMat.transpose().triangularView<Eigen::Lower>();
851         }
852         std::vector<int> &rowMap = estate->grp.rowMap;
853         double *rowWeight = estate->grp.rowWeight;
854         std::vector<bool> &rowSkip = estate->grp.rowSkip;
855         const int totalQuadPoints = quad.totalQuadPoints;
856         omxMatrix *itemParam = estate->itemParam;
857         omxBuffer<double> patternLik(numUnique);
858
859         const int priDerivCoef = pDims + triangleLoc1(pDims);
860         const int numLatents = maxAbilities + triangleLoc1(maxAbilities);
861         const int thrDerivSize = itemParam->cols * state->itemDerivPadSize;
862         const int totalOutcomes = estate->totalOutcomes();
863         const int numItems = state->freeItemParams? estate->numItems() : 0;
864         const size_t numParam = fc->varGroup->vars.size();
865         std::vector<double> thrGrad(numThreads * numParam);
866         std::vector<double> thrMeat(numThreads * numParam * numParam);
867         const double *wherePrep = quad.wherePrep.data();
868
869         if (numSpecific == 0) {
870                 omxBuffer<double> thrLxk(totalQuadPoints * numThreads);
871                 omxBuffer<double> derivCoef(totalQuadPoints * priDerivCoef);
872
873                 if (state->freeLatents) {
874 #pragma omp parallel for num_threads(numThreads)
875                         for (int qx=0; qx < totalQuadPoints; qx++) {
876                                 const double *where = wherePrep + qx * maxDims;
877                                 calcDerivCoef(fc, state, estate, icovMat.data(), where,
878                                               derivCoef.data() + qx * priDerivCoef);
879                         }
880                 }
881
882 #pragma omp parallel for num_threads(numThreads)
883                 for (int px=0; px < numUnique; px++) {
884                         if (rowSkip[px]) continue;
885                         int thrId = omx_absolute_thread_num();
886                         double *lxk = thrLxk.data() + thrId * totalQuadPoints;
887                         omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
888                         std::vector<double> deriv0(thrDerivSize);
889                         std::vector<double> latentGrad(numLatents);
890                         std::vector<double> patGrad(numParam);
891                         double *grad = thrGrad.data() + thrId * numParam;
892                         double *meat = thrMeat.data() + thrId * numParam * numParam;
893                         estate->grp.ba81LikelihoodSlow2(px, lxk);
894
895                         // If patternLik is already valid, maybe could avoid this loop TODO
896                         double patternLik1 = 0;
897                         for (int qx=0; qx < totalQuadPoints; qx++) {
898                                 patternLik1 += lxk[qx];
899                         }
900                         patternLik[px] = patternLik1;
901
902                         // if (!validPatternLik(state, patternLik1))  complain, TODO
903
904                         for (int qx=0; qx < totalQuadPoints; qx++) {
905                                 double tmp = lxk[qx];
906                                 mapLatentDeriv(state, estate, tmp, derivCoef.data() + qx * priDerivCoef,
907                                                latentGrad.data());
908
909                                 for (int ix=0; ix < numItems; ++ix) {
910                                         int pick = estate->grp.dataColumns[ix][rowMap[px]];
911                                         if (pick == NA_INTEGER) continue;
912                                         OMXZERO(expected.data(), estate->itemOutcomes(ix));
913                                         expected[pick-1] = tmp;
914                                         const double *spec = estate->itemSpec(ix);
915                                         double *iparam = omxMatrixColumn(itemParam, ix);
916                                         const int id = spec[RPF_ISpecID];
917                                         double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize;
918                                         (*rpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims,
919                                                               expected.data(), myDeriv);
920                                 }
921                         }
922
923                         gradCov_finish_1pat(1 / patternLik1, rowWeight[px], numItems, numLatents, numParam,
924                                         state, estate, itemParam, deriv0, latentGrad, Scale, patGrad, grad, meat);
925                 }
926         } else {
927                 const int totalPrimaryPoints = quad.totalPrimaryPoints;
928                 const int specificPoints = quad.quadGridSize;
929                 omxBuffer<double> thrLxk(totalQuadPoints * numSpecific * numThreads);
930                 omxBuffer<double> thrEi(totalPrimaryPoints * numThreads);
931                 omxBuffer<double> thrEis(totalPrimaryPoints * numSpecific * numThreads);
932                 const int derivPerPoint = priDerivCoef + 2 * numSpecific;
933                 omxBuffer<double> derivCoef(totalQuadPoints * derivPerPoint);
934
935                 if (state->freeLatents) {
936 #pragma omp parallel for num_threads(numThreads)
937                         for (int qx=0; qx < totalQuadPoints; qx++) {
938                                 const double *where = wherePrep + qx * maxDims;
939                                 calcDerivCoef(fc, state, estate, icovMat.data(), where,
940                                               derivCoef.data() + qx * derivPerPoint);
941                                 for (int Sgroup=0; Sgroup < numSpecific; ++Sgroup) {
942                                         calcDerivCoef1(fc, state, estate, where, Sgroup,
943                                                        derivCoef.data() + qx * derivPerPoint + priDerivCoef + 2 * Sgroup);
944                                 }
945                         }
946                 }
947
948 #pragma omp parallel for num_threads(numThreads)
949                 for (int px=0; px < numUnique; px++) {
950                         if (rowSkip[px]) continue;
951                         int thrId = omx_absolute_thread_num();
952                         double *lxk = thrLxk.data() + totalQuadPoints * numSpecific * thrId;
953                         double *Ei = thrEi.data() + totalPrimaryPoints * thrId;
954                         double *Eis = thrEis.data() + totalPrimaryPoints * numSpecific * thrId;
955                         omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
956                         std::vector<double> deriv0(thrDerivSize);
957                         std::vector<double> latentGrad(numLatents);
958                         std::vector<double> patGrad(numParam);
959                         double *grad = thrGrad.data() + thrId * numParam;
960                         double *meat = thrMeat.data() + thrId * numParam * numParam;
961                         estate->grp.cai2010EiEis(px, lxk, Eis, Ei);
962
963                         for (int qx=0, qloc = 0; qx < totalPrimaryPoints; qx++) {
964                                 for (int sgroup=0; sgroup < numSpecific; ++sgroup) {
965                                         Eis[qloc] = Ei[qx] / Eis[qloc];
966                                         ++qloc;
967                                 }
968                         }
969
970                         for (int qloc=0, eisloc=0, qx=0; eisloc < totalPrimaryPoints * numSpecific; eisloc += numSpecific) {
971                                 for (int sx=0; sx < specificPoints; sx++) {
972                                         mapLatentDeriv(state, estate, Eis[eisloc] * lxk[qloc],
973                                                        derivCoef.data() + qx * derivPerPoint,
974                                                        latentGrad.data());
975
976                                         for (int Sgroup=0; Sgroup < numSpecific; Sgroup++) {
977                                                 double lxk1 = lxk[qloc];
978                                                 double Eis1 = Eis[eisloc + Sgroup];
979                                                 double tmp = Eis1 * lxk1;
980                                                 mapLatentDerivS(state, estate, Sgroup, tmp,
981                                                                 derivCoef.data() + qx * derivPerPoint + priDerivCoef + 2 * Sgroup,
982                                                                 latentGrad.data());
983
984                                                 for (int ix=0; ix < numItems; ++ix) {
985                                                         if (estate->grp.Sgroup[ix] != Sgroup) continue;
986                                                         int pick = estate->grp.dataColumns[ix][rowMap[px]];
987                                                         if (pick == NA_INTEGER) continue;
988                                                         OMXZERO(expected.data(), estate->itemOutcomes(ix));
989                                                         expected[pick-1] = tmp;
990                                                         const double *spec = estate->itemSpec(ix);
991                                                         double *iparam = omxMatrixColumn(itemParam, ix);
992                                                         const int id = spec[RPF_ISpecID];
993                                                         const int dims = spec[RPF_ISpecDims];
994                                                         double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize;
995                                                         const double *where = wherePrep + qx * maxDims;
996                                                         Eigen::VectorXd ptheta(dims);
997                                                         for (int dx=0; dx < dims; dx++) {
998                                                                 ptheta[dx] = where[std::min(dx, maxDims-1)];
999                                                         }
1000                                                         (*rpf_model[id].dLL1)(spec, iparam, ptheta.data(),
1001                                                                               expected.data(), myDeriv);
1002                                                 }
1003                                                 ++qloc;
1004                                         }
1005                                         ++qx;
1006                                 }
1007                         }
1008
1009                         // If patternLik is already valid, maybe could avoid this loop TODO
1010                         double patternLik1 = 0;
1011                         for (int qx=0; qx < totalPrimaryPoints; ++qx) {
1012                                 patternLik1 += Ei[qx];
1013                         }
1014                         patternLik[px] = patternLik1;
1015
1016                         gradCov_finish_1pat(1 / patternLik1, rowWeight[px], numItems, numLatents, numParam,
1017                                         state, estate, itemParam, deriv0, latentGrad, Scale, patGrad, grad, meat);
1018                 }
1019         }
1020
1021         for (int tx=1; tx < numThreads; ++tx) {
1022                 double *th = thrGrad.data() + tx * numParam;
1023                 for (size_t en=0; en < numParam; ++en) {
1024                         thrGrad[en] += th[en];
1025                 }
1026         }
1027         for (int tx=1; tx < numThreads; ++tx) {
1028                 double *th = thrMeat.data() + tx * numParam * numParam;
1029                 for (size_t en=0; en < numParam * numParam; ++en) {
1030                         thrMeat[en] += th[en];
1031                 }
1032         }
1033         for (size_t d1=0; d1 < numParam; ++d1) {
1034                 fc->grad(d1) += thrGrad[d1];
1035         }
1036         if (fc->infoB) {
1037                 for (size_t d1=0; d1 < numParam; ++d1) {
1038                         for (size_t d2=0; d2 < numParam; ++d2) {
1039                                 int cell = d1 * numParam + d2;
1040                                 fc->infoB[cell] += thrMeat[cell];
1041                         }
1042                 }
1043         }
1044 }
1045
1046 static void
1047 ba81ComputeFit(omxFitFunction* oo, int want, FitContext *fc)
1048 {
1049         BA81FitState *state = (BA81FitState*) oo->argStruct;
1050         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
1051         if (fc) state->numFreeParam = fc->varGroup->vars.size();
1052
1053         if (want & FF_COMPUTE_INITIAL_FIT) return;
1054
1055         if (estate->type == EXPECTATION_AUGMENTED) {
1056                 buildItemParamMap(oo, fc);
1057                 if (state->freeItemParams) estate->expectedUsed = true;
1058
1059                 if (want & FF_COMPUTE_PARAMFLAVOR) {
1060                         for (size_t px=0; px < state->numFreeParam; ++px) {
1061                                 if (state->paramFlavor[px] == NULL) continue;
1062                                 fc->flavor[px] = state->paramFlavor[px];
1063                         }
1064                         return;
1065                 }
1066
1067                 if (want & FF_COMPUTE_PREOPTIMIZE) {
1068                         omxExpectationCompute(oo->expectation, NULL);
1069                         return;
1070                 }
1071
1072                 if (want & FF_COMPUTE_INFO) {
1073                         buildLatentParamMap(oo, fc);
1074                         if (!state->freeItemParams) {
1075                                 omxRaiseErrorf("%s: no free parameters", oo->matrix->name);
1076                                 return;
1077                         }
1078                         ba81SetupQuadrature(oo->expectation);
1079
1080                         if (fc->infoMethod == INFO_METHOD_HESSIAN) {
1081                                 ba81ComputeEMFit(oo, FF_COMPUTE_HESSIAN, fc);
1082                         } else {
1083                                 omxRaiseErrorf("Information matrix approximation method %d is not available",
1084                                                fc->infoMethod);
1085                                 return;
1086                         }
1087                         return;
1088                 }
1089
1090                 double got = ba81ComputeEMFit(oo, want, fc);
1091                 oo->matrix->data[0] = got;
1092                 return;
1093         } else if (estate->type == EXPECTATION_OBSERVED) {
1094
1095                 if (want == FF_COMPUTE_STARTING) {
1096                         buildLatentParamMap(oo, fc);
1097                         if (state->freeLatents) setLatentStartingValues(oo, fc);
1098                         return;
1099                 }
1100
1101                 if (want & (FF_COMPUTE_INFO | FF_COMPUTE_GRADIENT)) {
1102                         buildLatentParamMap(oo, fc); // only to check state->freeLatents
1103                         buildItemParamMap(oo, fc);
1104                         if (!state->freeItemParams && !state->freeLatents) {
1105                                 omxRaiseErrorf("%s: no free parameters", oo->matrix->name);
1106                                 return;
1107                         }
1108                         ba81SetupQuadrature(oo->expectation);
1109
1110                         if (want & FF_COMPUTE_GRADIENT ||
1111                             (want & FF_COMPUTE_INFO && fc->infoMethod == INFO_METHOD_MEAT)) {
1112                                 gradCov(oo, fc);
1113                         } else {
1114                                 if (state->freeLatents) {
1115                                         omxRaiseErrorf("Information matrix approximation method %d is not available",
1116                                                        fc->infoMethod);
1117                                         return;
1118                                 }
1119                                 if (!state->freeItemParams) {
1120                                         omxRaiseErrorf("%s: no free parameters", oo->matrix->name);
1121                                         return;
1122                                 }
1123                                 sandwich(oo, fc);
1124                         }
1125                 }
1126                 if (want & (FF_COMPUTE_HESSIAN | FF_COMPUTE_IHESSIAN)) {
1127                         omxRaiseErrorf("%s: Hessian is not available for observed data", oo->matrix->name);
1128                 }
1129
1130                 if (want & FF_COMPUTE_MAXABSCHANGE) {
1131                         double mac = std::max(omxMaxAbsDiff(state->itemParam, estate->itemParam),
1132                                               omxMaxAbsDiff(state->latentMean, estate->_latentMeanOut));
1133                         fc->mac = std::max(mac, omxMaxAbsDiff(state->latentCov, estate->_latentCovOut));
1134                         state->copyEstimates(estate);
1135                 }
1136
1137                 if (want & FF_COMPUTE_FIT) {
1138                         omxExpectationCompute(oo->expectation, NULL);
1139
1140                         Eigen::ArrayXd &patternLik = estate->grp.patternLik;
1141                         const int numUnique = estate->getNumUnique();
1142                         if (state->returnRowLikelihoods) {
1143                                 const double OneOverLargest = estate->grp.quad.getReciprocalOfOne();
1144                                 omxData *data = estate->data;
1145                                 for (int rx=0; rx < numUnique; rx++) {
1146                                         int dups = omxDataNumIdenticalRows(data, estate->grp.rowMap[rx]);
1147                                         for (int dup=0; dup < dups; dup++) {
1148                                                 int dest = omxDataIndex(data, estate->grp.rowMap[rx]+dup);
1149                                                 oo->matrix->data[dest] = patternLik[rx] * OneOverLargest;
1150                                         }
1151                                 }
1152                         } else {
1153                                 double *rowWeight = estate->grp.rowWeight;
1154                                 const double LogLargest = estate->LogLargestDouble;
1155                                 double got = 0;
1156 #pragma omp parallel for num_threads(Global->numThreads) reduction(+:got)
1157                                 for (int ux=0; ux < numUnique; ux++) {
1158                                         if (patternLik[ux] == 0) continue;
1159                                         got += rowWeight[ux] * (log(patternLik[ux]) - LogLargest);
1160                                 }
1161                                 double fit = Global->llScale * got;
1162                                 if (estate->verbose >= 1) mxLog("%s: observed fit %.4f (%d/%d excluded)",
1163                                                                 oo->matrix->name, fit, estate->grp.excludedPatterns, numUnique);
1164                                 oo->matrix->data[0] = fit;
1165                         }
1166                 }
1167         } else {
1168                 Rf_error("%s: Predict nothing or scores before computing %d", oo->matrix->name, want);
1169         }
1170 }
1171
1172 static void ba81Compute(omxFitFunction *oo, int want, FitContext *fc)
1173 {
1174         if (!want) return;
1175         ba81ComputeFit(oo, want, fc);
1176 }
1177
1178 BA81FitState::~BA81FitState()
1179 {
1180         omxFreeMatrix(itemParam);
1181         omxFreeMatrix(latentMean);
1182         omxFreeMatrix(latentCov);
1183 }
1184
1185 static void ba81Destroy(omxFitFunction *oo) {
1186         BA81FitState *state = (BA81FitState *) oo->argStruct;
1187         delete state;
1188 }
1189
1190 void omxInitFitFunctionBA81(omxFitFunction* oo)
1191 {
1192         if (!oo->argStruct) { // ugh!
1193                 BA81FitState *state = new BA81FitState;
1194                 oo->argStruct = state;
1195         }
1196
1197         BA81FitState *state = (BA81FitState*) oo->argStruct;
1198
1199         omxExpectation *expectation = oo->expectation;
1200         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
1201         estate->fit = oo;
1202
1203         oo->computeFun = ba81Compute;
1204         oo->setVarGroup = ba81SetFreeVarGroup;
1205         oo->destructFun = ba81Destroy;
1206         oo->gradientAvailable = TRUE;
1207         oo->hessianAvailable = TRUE;
1208
1209         int maxParam = estate->itemParam->rows;
1210         state->itemDerivPadSize = maxParam + triangleLoc1(maxParam);
1211
1212         int numItems = estate->itemParam->cols;
1213         for (int ix=0; ix < numItems; ix++) {
1214                 const double *spec = estate->itemSpec(ix);
1215                 int id = spec[RPF_ISpecID];
1216                 if (id < 0 || id >= rpf_numModels) {
1217                         Rf_error("ItemSpec %d has unknown item model %d", ix, id);
1218                 }
1219         }
1220
1221         state->itemParam = omxInitMatrix(0, 0, TRUE, globalState);
1222         state->latentMean = omxInitMatrix(0, 0, TRUE, globalState);
1223         state->latentCov = omxInitMatrix(0, 0, TRUE, globalState);
1224         state->copyEstimates(estate);
1225
1226         state->returnRowLikelihoods = Rf_asInteger(R_do_slot(oo->rObj, Rf_install("vector")));
1227 }