Enable R_NO_REMAP for a cleaner namespace
[openmx:openmx.git] / src / omxFitFunctionBA81.cpp
1 /*
2   Copyright 2012-2014 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <algorithm>
19
20 #include "omxFitFunction.h"
21 #include "omxExpectationBA81.h"
22 #include "omxOpenmpWrap.h"
23 #include "libifa-rpf.h"
24 #include "matrix.h"
25 #include "omxBuffer.h"
26
27 struct LatentParamLoc {
28         bool mean;     // false=cov
29         int row, col;
30 };
31
32 struct BA81FitState {
33         //private:
34         void copyEstimates(BA81Expect *estate);
35         void recordHGterm(int hoffset, int at1, int at2);
36         //public:
37         int haveLatentMap;
38         std::vector<int> latentMap;
39         std::vector< LatentParamLoc > latentLoc;
40         bool freeLatents;
41         int ElatentVersion;
42
43         int haveItemMap;
44         size_t numFreeParam;
45         int itemDerivPadSize;                // maxParam + maxParam*(1+maxParam)/2
46         bool freeItemParams;
47         std::vector<int> paramPerItem;       // itemParam->cols
48         std::vector<int> paramFlavor;        // numFreeParam
49         std::vector<int> paramMap;           // itemParam->cols * itemDerivPadSize -> index of free parameter
50         std::vector<int> itemGradMap;        // index of gradient -> index of free parameter
51         std::vector<int> itemParamFree;      // itemParam->cols * itemParam->rows
52         std::vector<int> ihessDivisor;       // numFreeParam * numFreeParam
53         std::vector< matrixVectorProdTerm > hgProd;
54
55         // The following are only used to compute FF_COMPUTE_MAXABSCHANGE
56         omxMatrix *itemParam;
57         omxMatrix *latentMean;
58         omxMatrix *latentCov;
59
60         BA81FitState();
61         ~BA81FitState();
62 };
63
64 // writes to upper triangle of full matrix
65 static void addSymOuterProd(const double weight, const double *vec, const int len, double *out)
66 {
67         for (int d1=0; d1 < len; ++d1) {
68                 for (int d2=0; d2 <= d1; ++d2) {
69                         out[d1 * len + d2] += weight * vec[d1] * vec[d2];
70                 }
71         }
72 }
73
74 BA81FitState::BA81FitState()
75 {
76         haveItemMap = FREEVARGROUP_INVALID;
77         haveLatentMap = FREEVARGROUP_INVALID;
78 }
79
80 void BA81FitState::copyEstimates(BA81Expect *estate)
81 {
82         omxCopyMatrix(itemParam, estate->itemParam);
83         omxCopyMatrix(latentMean, estate->latentMeanOut);
84         omxCopyMatrix(latentCov, estate->latentCovOut);
85 }
86
87 void BA81FitState::recordHGterm(int hoffset, int at1, int at2)
88 {
89         //mxLog("? H %d * g %d = p %d", hoffset, at2, at1);
90         matrixVectorProdTerm mvpt(hoffset, at2, at1);
91         hgProd.push_back(mvpt);
92
93         if (at1 != at2) {
94                 matrixVectorProdTerm mvpt(hoffset, at1, at2);
95                 hgProd.push_back(mvpt);
96         }
97 }
98
99 static void buildLatentParamMap(omxFitFunction* oo, FitContext *fc)
100 {
101         FreeVarGroup *fvg = fc->varGroup;
102         BA81FitState *state = (BA81FitState *) oo->argStruct;
103         std::vector<int> &latentMap = state->latentMap;
104         std::vector< LatentParamLoc > &latentLoc = state->latentLoc;
105         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
106         int meanNum = ~estate->latentMeanOut->matrixNumber;
107         int covNum = ~estate->latentCovOut->matrixNumber;
108         int maxAbilities = estate->maxAbilities;
109         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
110         const size_t numFreeParams = state->numFreeParam;
111
112         if (state->haveLatentMap == fc->varGroup->id[0]) return;
113         if (estate->verbose) mxLog("%s: rebuild latent parameter map for %d", oo->matrix->name, fc->varGroup->id[0]);
114
115         state->freeLatents = false;
116         latentMap.assign(numLatents + triangleLoc1(numLatents), -1);
117         LatentParamLoc nullLoc = { false, -1, -1 };
118         latentLoc.assign(fvg->vars.size(), nullLoc);
119
120         int numParam = int(fvg->vars.size());
121         for (int px=0; px < numParam; px++) {
122                 omxFreeVar *fv = fvg->vars[px];
123                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
124                         omxFreeVarLocation *loc = &fv->locations[lx];
125                         int matNum = loc->matrix;
126                         if (matNum == meanNum) {
127                                 latentMap[loc->row + loc->col] = px;
128                                 latentLoc[px].mean = true;
129                                 latentLoc[px].row = loc->row * loc->col;
130                                 latentLoc[px].col = 1;
131                                 state->freeLatents = true;
132                         } else if (matNum == covNum) {
133                                 int a1 = loc->row;
134                                 int a2 = loc->col;
135                                 if (a1 < a2) std::swap(a1, a2);
136                                 int cell = maxAbilities + triangleLoc1(a1) + a2;
137                                 if (latentMap[cell] == -1) {
138                                         latentMap[cell] = px;
139
140                                         if (a1 == a2 && fv->lbound == NEG_INF) {
141                                                 fv->lbound = BA81_MIN_VARIANCE;  // variance must be positive
142                                                 if (fc->est[px] < fv->lbound) {
143                                                         Rf_error("Starting value for variance %s is negative", fv->name);
144                                                 }
145                                         }
146                                 } else if (latentMap[cell] != px) {
147                                         // doesn't detect similar problems in multigroup constraints TODO
148                                         Rf_error("In covariance matrix, %s and %s must be constrained equal to preserve symmetry",
149                                               fvg->vars[latentMap[cell]]->name, fv->name);
150                                 }
151                                 latentLoc[px].mean = false;
152                                 latentLoc[px].row = a1;
153                                 latentLoc[px].col = a2;
154                                 state->freeLatents = true;
155                         }
156                 }
157         }
158         state->haveLatentMap = fc->varGroup->id[0];
159
160         for (int p1=0; p1 < numLatents; p1++) {
161                 int at1 = latentMap[p1];
162                 if (at1 < 0) continue;
163
164                 for (int p2=0; p2 <= p1; p2++) {
165                         int at2 = latentMap[p2];
166                         if (at2 < 0) continue;
167
168                         if (at1 < at2) std::swap(at1, at2);  // lower triangle
169
170                         int at = numLatents + triangleLoc1(p1) + p2;
171                         int hoffset = at1 * numFreeParams + at2;
172                         latentMap[at] = hoffset;
173
174                         // ihessDivisor for S-EM TODO
175                 }
176         }
177 }
178
179 static void buildItemParamMap(omxFitFunction* oo, FitContext *fc)
180 {
181         FreeVarGroup *fvg = fc->varGroup;
182         BA81FitState *state = (BA81FitState *) oo->argStruct;
183         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
184
185         if (state->haveItemMap == fc->varGroup->id[0]) return;
186         if (estate->verbose >= 1) mxLog("%s: rebuild item parameter map for var group %d",
187                                         oo->matrix->name, fc->varGroup->id[0]);
188
189         omxMatrix *itemParam = estate->itemParam;
190         int size = itemParam->cols * state->itemDerivPadSize;
191         state->freeItemParams = false;
192         state->paramMap.assign(size, -1);  // matrix location to free param index
193         state->itemParamFree.assign(itemParam->rows * itemParam->cols, FALSE);
194
195         const size_t numFreeParams = state->numFreeParam;
196         std::vector<int> paramLocations(numFreeParams);
197         state->paramFlavor.assign(numFreeParams, -1);
198
199         int totalParam = 0;
200         state->paramPerItem.resize(itemParam->cols);
201         for (int cx=0; cx < itemParam->cols; ++cx) {
202                 const double *spec = estate->itemSpec[cx];
203                 const int id = spec[RPF_ISpecID];
204                 const int numParam = (*rpf_model[id].numParam)(spec);
205                 state->paramPerItem[cx] = numParam;
206                 totalParam += numParam;
207         }
208         state->itemGradMap.assign(totalParam, -1);
209
210         for (size_t px=0; px < numFreeParams; px++) {
211                 omxFreeVar *fv = fvg->vars[px];
212                 paramLocations[px] = int(fv->locations.size());
213                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
214                         omxFreeVarLocation *loc = &fv->locations[lx];
215                         int matNum = ~loc->matrix;
216                         if (matNum != itemParam->matrixNumber) continue;
217
218                         int at = loc->col * state->itemDerivPadSize + loc->row;
219                         state->paramMap[at] = px;
220                         state->itemParamFree[loc->col * itemParam->rows + loc->row] = TRUE;
221                         state->freeItemParams = true;
222
223                         const double *spec = estate->itemSpec[loc->col];
224                         int id = spec[RPF_ISpecID];
225                         int flavor;
226                         double upper, lower;
227                         (*rpf_model[id].paramInfo)(spec, loc->row, &flavor, &upper, &lower);
228                         if (state->paramFlavor[px] < 0) {
229                                 state->paramFlavor[px] = flavor;
230                         } else if (state->paramFlavor[px] != flavor) {
231                                 Rf_error("Cannot equate %s with %s[%d,%d]", fv->name,
232                                       itemParam->name, loc->row, loc->col);
233                         }
234                         if (fv->lbound == NEG_INF && isfinite(lower)) {
235                                 fv->lbound = lower;
236                                 if (fc->est[px] < fv->lbound) {
237                                         Rf_error("Starting value %s %f less than lower bound %f",
238                                               fv->name, fc->est[px], lower);
239                                 }
240                         }
241                         if (fv->ubound == INF && isfinite(upper)) {
242                                 fv->ubound = upper;
243                                 if (fc->est[px] > fv->ubound) {
244                                         Rf_error("Starting value %s %f greater than upper bound %f",
245                                               fv->name, fc->est[px], upper);
246                                 }
247                         }
248                 }
249         }
250
251         int gradOffset = 0;
252         for (int cx=0; cx < itemParam->cols; ++cx) {
253                 for (int rx=0; rx < state->paramPerItem[cx]; ++rx) {
254                         int at = cx * state->itemDerivPadSize + rx;
255                         int px = state->paramMap[at];
256                         if (px >= 0) state->itemGradMap[gradOffset] = px;
257                         ++gradOffset;
258                 }
259         }
260
261         state->ihessDivisor.resize(size);
262
263         for (int cx=0; cx < itemParam->cols; ++cx) {
264                 int numParam = state->paramPerItem[cx];
265                 for (int p1=0; p1 < numParam; p1++) {
266                         int at1 = state->paramMap[cx * state->itemDerivPadSize + p1];
267                         if (at1 < 0) continue;
268
269                         for (int p2=0; p2 <= p1; p2++) {
270                                 int at2 = state->paramMap[cx * state->itemDerivPadSize + p2];
271                                 if (at2 < 0) continue;
272
273                                 if (at1 < at2) std::swap(at1, at2);  // lower triangle
274
275                                 //mxLog("Item %d param(%d,%d) -> H[%d,%d]", cx, p1, p2, at1, at2);
276                                 int at = cx * state->itemDerivPadSize + numParam + triangleLoc1(p1) + p2;
277                                 int hoffset = at1 * numFreeParams + at2;
278
279                                 state->recordHGterm(hoffset, at1, at2);
280
281                                 state->paramMap[at] = numFreeParams + hoffset;
282
283                                 state->ihessDivisor[at] =
284                                         paramLocations[at1] * paramLocations[at2];
285                         }
286                 }
287         }
288
289         state->haveItemMap = fc->varGroup->id[0];
290         //pia(state->paramMap.data(), state->itemDerivPadSize, itemParam->cols);
291 }
292
293 static double
294 ba81ComputeEMFit(omxFitFunction* oo, int want, FitContext *fc)
295 {
296         const double Scale = Global->llScale;
297         BA81FitState *state = (BA81FitState*) oo->argStruct;
298         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
299         omxMatrix *itemParam = estate->itemParam;
300         std::vector<const double*> &itemSpec = estate->itemSpec;
301         std::vector<int> &cumItemOutcomes = estate->cumItemOutcomes;
302         const int maxDims = estate->maxDims;
303         const size_t numItems = estate->itemSpec.size();
304         const int do_fit = want & FF_COMPUTE_FIT;
305         const int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN | FF_COMPUTE_IHESSIAN);
306
307         if (estate->verbose >= 2) mxLog("%s: em.fit(want fit=%d deriv=%d)", oo->matrix->name, do_fit, do_deriv);
308
309         if (do_fit) ba81OutcomeProb(estate, FALSE, TRUE);
310
311         const int thrDerivSize = itemParam->cols * state->itemDerivPadSize;
312         std::vector<double> thrDeriv(thrDerivSize * Global->numThreads);
313         double *wherePrep = estate->wherePrep.data();
314
315         double ll = 0;
316 #pragma omp parallel for num_threads(Global->numThreads) reduction(+:ll)
317         for (size_t ix=0; ix < numItems; ix++) {
318                 const int thrId = omx_absolute_thread_num();
319                 const double *spec = estate->itemSpec[ix];
320                 const int id = spec[RPF_ISpecID];
321                 const rpf_dLL1_t dLL1 = rpf_model[id].dLL1;
322                 const int iOutcomes = estate->itemOutcomes[ix];
323                 const int outcomeBase = cumItemOutcomes[ix] * estate->totalQuadPoints;
324                 const double *weight = estate->expected + outcomeBase;
325                 const double *oProb = estate->outcomeProb + outcomeBase;
326                 const double *iparam = omxMatrixColumn(itemParam, ix);
327                 double *myDeriv = thrDeriv.data() + thrDerivSize * thrId + ix * state->itemDerivPadSize;
328
329                 for (long qx=0; qx < estate->totalQuadPoints; qx++) {
330                         if (do_fit) {
331                                 for (int ox=0; ox < iOutcomes; ox++) {
332                                         ll += weight[ox] * oProb[ox];
333                                 }
334                         }
335                         if (do_deriv) {
336                                 (*dLL1)(spec, iparam, wherePrep + qx * maxDims, weight, myDeriv);
337                         }
338                         weight += iOutcomes;
339                         oProb += iOutcomes;
340                 }
341         }
342
343         size_t excluded = 0;
344
345         if (do_deriv) {
346                 double *deriv0 = thrDeriv.data();
347
348                 int perThread = itemParam->cols * state->itemDerivPadSize;
349                 for (int th=1; th < Global->numThreads; th++) {
350                         double *thrD = thrDeriv.data() + th * perThread;
351                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
352                 }
353
354                 for (size_t ix=0; ix < numItems; ix++) {
355                         const double *spec = itemSpec[ix];
356                         int id = spec[RPF_ISpecID];
357                         double *iparam = omxMatrixColumn(itemParam, ix);
358                         double *pad = deriv0 + ix * state->itemDerivPadSize;
359                         (*rpf_model[id].dLL2)(spec, iparam, pad);
360                 }
361
362                 int numFreeParams = int(state->numFreeParam);
363                 int numParams = itemParam->cols * state->itemDerivPadSize;
364                 for (int ox=0; ox < numParams; ox++) {
365                         int to = state->paramMap[ox];
366                         if (to == -1) continue;
367
368                         // Need to check because this can happen if
369                         // lbounds/ubounds are not set appropriately.
370                         if (0 && !isfinite(deriv0[ox])) {
371                                 int item = ox / itemParam->rows;
372                                 mxLog("item parameters:\n");
373                                 const double *spec = itemSpec[item];
374                                 int id = spec[RPF_ISpecID];
375                                 int numParam = (*rpf_model[id].numParam)(spec);
376                                 double *iparam = omxMatrixColumn(itemParam, item);
377                                 pda(iparam, numParam, 1);
378                                 // Perhaps bounds can be pulled in from librpf? TODO
379                                 Rf_error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
380                                       ox, item, deriv0[ox]);
381                         }
382
383                         if (to < numFreeParams) {
384                                 if (want & FF_COMPUTE_GRADIENT) {
385                                         fc->grad[to] -= Scale * deriv0[ox];
386                                 }
387                         } else {
388                                 if (want & FF_COMPUTE_HESSIAN) {
389                                         int Hto = to - numFreeParams;
390                                         fc->hess[Hto] -= Scale * deriv0[ox];
391                                 }
392                         }
393                 }
394
395                 if (want & FF_COMPUTE_IHESSIAN) {
396                         for (size_t ix=0; ix < numItems; ix++) {
397                                 const double *spec = itemSpec[ix];
398                                 int id = spec[RPF_ISpecID];
399                                 int iParams = (*rpf_model[id].numParam)(spec);
400                                 double *pad = deriv0 + ix * state->itemDerivPadSize + iParams;
401                                 int *mask = state->itemParamFree.data() + ix * itemParam->rows;
402                                 double stress;
403                                 omxApproxInvertPackedPosDefTriangular(iParams, mask, pad, &stress);
404                                 // If items excluded then ihessDivisor is wrong TODO
405                                 if (stress) ++excluded;
406                         }
407                         for (int ox=0; ox < numParams; ox++) {
408                                 int to = state->paramMap[ox];
409                                 if (to == -1) continue;
410                                 if (to >= numFreeParams) {
411                                         int Hto = to - numFreeParams;
412                                         fc->ihess[Hto] -= deriv0[ox] / (Scale * state->ihessDivisor[ox]);
413                                 }
414                         }
415                 }
416         }
417
418         if (excluded && estate->verbose >= 1) {
419                 mxLog("%s: Hessian not positive definite for %lu/%lu items",
420                       oo->matrix->name, excluded, numItems);
421         }
422         if (excluded == numItems) {
423                 omxRaiseErrorf(globalState, "Hessian not positive definite for %lu/%lu items",
424                                excluded, numItems);
425         }
426
427         return Scale * ll;
428 }
429
430 void ba81SetFreeVarGroup(omxFitFunction *oo, FreeVarGroup *fvg)
431 {}
432
433 static void sandwich(omxFitFunction *oo, FitContext *fc)
434 {
435         const double abScale = fabs(Global->llScale);
436         omxExpectation *expectation = oo->expectation;
437         BA81FitState *state = (BA81FitState*) oo->argStruct;
438         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
439         if (estate->verbose >= 1) mxLog("%s: sandwich", oo->matrix->name);
440
441         ba81OutcomeProb(estate, FALSE, FALSE);
442
443         const int numThreads = Global->numThreads;
444         const int numUnique = estate->numUnique;
445         const int numSpecific = estate->numSpecific;
446         const int maxDims = estate->maxDims;
447         omxData *data = estate->data;
448         const int *rowMap = estate->rowMap;
449         int *numIdentical = estate->numIdentical;
450         const long totalQuadPoints = estate->totalQuadPoints;
451         omxMatrix *itemParam = estate->itemParam;
452         omxBuffer<double> patternLik(numUnique);
453
454         const int totalOutcomes = estate->totalOutcomes;
455         const size_t numItems = estate->itemSpec.size();
456         const size_t numParam = fc->varGroup->vars.size();
457         const double *wherePrep = estate->wherePrep.data();
458         std::vector<double> thrBreadG(numThreads * numParam * numParam);
459         std::vector<double> thrBreadH(numThreads * numParam * numParam);
460         std::vector<double> thrMeat(numThreads * numParam * numParam);
461
462         if (numSpecific == 0) {
463                 omxBuffer<double> thrLxk(totalQuadPoints * numThreads);
464
465 #pragma omp parallel for num_threads(numThreads)
466                 for (int px=0; px < numUnique; px++) {
467                         int thrId = omx_absolute_thread_num();
468                         double *lxk = thrLxk.data() + thrId * totalQuadPoints;
469                         omxBuffer<double> itemDeriv(state->itemDerivPadSize);
470                         omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
471                         double *breadG = thrBreadG.data() + thrId * numParam * numParam; //a
472                         double *breadH = thrBreadH.data() + thrId * numParam * numParam; //a
473                         double *meat = thrMeat.data() + thrId * numParam * numParam;   //b
474                         std::vector<double> patGrad(numParam);
475
476                         ba81LikelihoodSlow2(estate, px, lxk);
477
478                         // If patternLik is already valid, maybe could avoid this loop TODO
479                         double patternLik1 = 0;
480                         for (long qx=0; qx < totalQuadPoints; qx++) {
481                                 patternLik1 += lxk[qx];
482                         }
483                         patternLik[px] = patternLik1;
484
485                         // if (!validPatternLik(state, patternLik1))  complain
486
487                         double weight = 1 / patternLik[px];
488                         for (long qx=0; qx < totalQuadPoints; qx++) {
489                                 double tmp = lxk[qx] * weight;
490                                 double sqrtTmp = sqrt(tmp);
491
492                                 std::vector<double> gradBuf(numParam);
493                                 int gradOffset = 0;
494
495                                 for (size_t ix=0; ix < numItems; ++ix) {
496                                         if (ix) gradOffset += state->paramPerItem[ix-1];
497                                         int pick = omxIntDataElementUnsafe(data, rowMap[px], ix);
498                                         if (pick == NA_INTEGER) continue;
499                                         pick -= 1;
500
501                                         const int iOutcomes = estate->itemOutcomes[ix];
502                                         OMXZERO(expected.data(), iOutcomes);
503                                         expected[pick] = 1;
504                                         const double *spec = estate->itemSpec[ix];
505                                         double *iparam = omxMatrixColumn(itemParam, ix);
506                                         const int id = spec[RPF_ISpecID];
507                                         OMXZERO(itemDeriv.data(), state->itemDerivPadSize);
508                                         (*rpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims,
509                                                               expected.data(), itemDeriv.data());
510                                         (*rpf_model[id].dLL2)(spec, iparam, itemDeriv.data());
511
512                                         for (int par = 0; par < state->paramPerItem[ix]; ++par) {
513                                                 int to = state->itemGradMap[gradOffset + par];
514                                                 if (to >= 0) {
515                                                         gradBuf[to] -= itemDeriv[par] * sqrtTmp;
516                                                         patGrad[to] -= itemDeriv[par] * tmp;
517                                                 }
518                                         }
519                                         int derivBase = ix * state->itemDerivPadSize;
520                                         for (int ox=0; ox < state->itemDerivPadSize; ox++) {
521                                                 int to = state->paramMap[derivBase + ox];
522                                                 if (to >= int(numParam)) {
523                                                         int Hto = to - numParam;
524                                                         breadH[Hto] += abScale * itemDeriv[ox] * tmp * numIdentical[px];
525                                                 }
526                                         }
527                                 }
528                                 addSymOuterProd(abScale * numIdentical[px], gradBuf.data(), numParam, breadG);
529                         }
530                         addSymOuterProd(abScale * numIdentical[px], patGrad.data(), numParam, meat);
531                 }
532
533         } else {
534                 const long totalPrimaryPoints = estate->totalPrimaryPoints;
535                 const long specificPoints = estate->quadGridSize;
536                 omxBuffer<double> thrLxk(totalQuadPoints * numSpecific * numThreads);
537                 omxBuffer<double> thrEi(totalPrimaryPoints * numThreads);
538                 omxBuffer<double> thrEis(totalPrimaryPoints * numSpecific * numThreads);
539
540 #pragma omp parallel for num_threads(numThreads)
541                 for (int px=0; px < numUnique; px++) {
542                         int thrId = omx_absolute_thread_num();
543                         omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
544                         omxBuffer<double> itemDeriv(state->itemDerivPadSize);
545                         double *breadG = thrBreadG.data() + thrId * numParam * numParam; //a
546                         double *breadH = thrBreadH.data() + thrId * numParam * numParam; //a
547                         double *meat = thrMeat.data() + thrId * numParam * numParam;   //b
548                         std::vector<double> patGrad(numParam);
549                         double *lxk = thrLxk.data() + totalQuadPoints * numSpecific * thrId;
550                         double *Ei = thrEi.data() + totalPrimaryPoints * thrId;
551                         double *Eis = thrEis.data() + totalPrimaryPoints * numSpecific * thrId;
552                         cai2010EiEis(estate, px, lxk, Eis, Ei);
553
554                         // If patternLik is already valid, maybe could avoid this loop TODO
555                         double patternLik1 = 0;
556                         for (long qx=0; qx < totalPrimaryPoints; ++qx) {
557                                 patternLik1 += Ei[qx];
558                         }
559                         patternLik[px] = patternLik1;
560
561                         // WARNING: I didn't work out the math. I just coded this the way
562                         // it seems to make sense.
563                         for (long qloc=0, eisloc=0, qx=0; eisloc < totalPrimaryPoints * numSpecific; eisloc += numSpecific) {
564                                 for (long sx=0; sx < specificPoints; sx++) {
565                                         for (int Sgroup=0; Sgroup < numSpecific; Sgroup++) {
566                                                 std::vector<double> gradBuf(numParam);
567                                                 int gradOffset = 0;
568                                                 double lxk1 = lxk[qloc + Sgroup];
569                                                 double Eis1 = Eis[eisloc + Sgroup];
570                                                 double tmp = Eis1 * lxk1 / patternLik1;
571                                                 double sqrtTmp = sqrt(tmp);
572                                                 for (size_t ix=0; ix < numItems; ++ix) {
573                                                         if (ix) gradOffset += state->paramPerItem[ix-1];
574                                                         if (estate->Sgroup[ix] != Sgroup) continue;
575                                                         int pick = omxIntDataElementUnsafe(data, rowMap[px], ix);
576                                                         if (pick == NA_INTEGER) continue;
577                                                         OMXZERO(expected.data(), estate->itemOutcomes[ix]);
578                                                         expected[pick-1] = 1;
579                                                         const double *spec = estate->itemSpec[ix];
580                                                         double *iparam = omxMatrixColumn(itemParam, ix);
581                                                         const int id = spec[RPF_ISpecID];
582                                                         OMXZERO(itemDeriv.data(), state->itemDerivPadSize);
583                                                         (*rpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims,
584                                                                               expected.data(), itemDeriv.data());
585                                                         (*rpf_model[id].dLL2)(spec, iparam, itemDeriv.data());
586
587                                                         for (int par = 0; par < state->paramPerItem[ix]; ++par) {
588                                                                 int to = state->itemGradMap[gradOffset + par];
589                                                                 if (to >= 0) {
590                                                                         gradBuf[to] -= itemDeriv[par] * sqrtTmp;
591                                                                         patGrad[to] -= itemDeriv[par] * tmp;
592                                                                 }
593                                                         }
594                                                         int derivBase = ix * state->itemDerivPadSize;
595                                                         for (int ox=0; ox < state->itemDerivPadSize; ox++) {
596                                                                 int to = state->paramMap[derivBase + ox];
597                                                                 if (to >= int(numParam)) {
598                                                                         int Hto = to - numParam;
599                                                                         breadH[Hto] += (abScale * itemDeriv[ox] *
600                                                                                         tmp * numIdentical[px]);
601                                                                 }
602                                                         }
603                                                 }
604                                                 addSymOuterProd(abScale * numIdentical[px], gradBuf.data(), numParam, breadG);
605                                         }
606                                         qloc += numSpecific;
607                                         ++qx;
608                                 }
609                         }
610                         addSymOuterProd(abScale * numIdentical[px], patGrad.data(), numParam, meat);
611                 }
612         }
613
614         // only need upper triangle TODO
615         for (int tx=1; tx < numThreads; ++tx) {
616                 double *th = thrBreadG.data() + tx * numParam * numParam;
617                 for (size_t en=0; en < numParam * numParam; ++en) {
618                         thrBreadG[en] += th[en];
619                 }
620         }
621         for (int tx=1; tx < numThreads; ++tx) {
622                 double *th = thrBreadH.data() + tx * numParam * numParam;
623                 for (size_t en=0; en < numParam * numParam; ++en) {
624                         thrBreadH[en] += th[en];
625                 }
626         }
627         for (int tx=1; tx < numThreads; ++tx) {
628                 double *th = thrMeat.data() + tx * numParam * numParam;
629                 for (size_t en=0; en < numParam * numParam; ++en) {
630                         thrMeat[en] += th[en];
631                 }
632         }
633         //pda(thrBreadG.data(), numParam, numParam);
634         //pda(thrBreadH.data(), numParam, numParam);
635         //pda(thrMeat.data(), numParam, numParam);
636         if (fc->infoA) {
637                 for (size_t d1=0; d1 < numParam; ++d1) {
638                         for (size_t d2=0; d2 < numParam; ++d2) {
639                                 int cell = d1 * numParam + d2;
640                                 fc->infoA[cell] += thrBreadH[cell] - thrBreadG[cell] + thrMeat[cell];
641                         }
642                 }
643         }
644         if (fc->infoB) {
645                 for (size_t d1=0; d1 < numParam; ++d1) {
646                         for (size_t d2=0; d2 < numParam; ++d2) {
647                                 int cell = d1 * numParam + d2;
648                                 fc->infoB[cell] += thrMeat[cell];
649                         }
650                 }
651         }
652         fc->sampleSize += data->rows; // remove? TODO
653 }
654
655 static void setLatentStartingValues(omxFitFunction *oo, FitContext *fc)
656 {
657         BA81FitState *state = (BA81FitState*) oo->argStruct;
658         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
659         std::vector<int> &latentMap = state->latentMap;
660         int maxAbilities = estate->maxAbilities;
661         omxMatrix *estMean = estate->estLatentMean;
662         omxMatrix *estCov = estate->estLatentCov;
663
664         for (int a1 = 0; a1 < maxAbilities; ++a1) {
665                 if (latentMap[a1] >= 0) {
666                         int to = latentMap[a1];
667                         fc->est[to] = omxVectorElement(estMean, a1);
668                 }
669
670                 for (int a2 = 0; a2 <= a1; ++a2) {
671                         int to = latentMap[maxAbilities + triangleLoc1(a1) + a2];
672                         if (to < 0) continue;
673                         fc->est[to] = omxMatrixElement(estCov, a1, a2);
674                 }
675         }
676
677         if (estate->verbose >= 1) {
678                 mxLog("%s: set latent parameters for version %d",
679                       oo->matrix->name, estate->ElatentVersion);
680         }
681 }
682
683 static void mapLatentDeriv(BA81FitState *state, BA81Expect *estate, double piece,
684                            double *derivCoef, double *derivOut)
685 {
686         const int maxAbilities = estate->maxAbilities;
687         const int pmax = estate->numSpecific? estate->maxDims - 1 : estate->maxDims;
688
689         int cx = 0;
690         for (int d1=0; d1 < pmax; ++d1) {
691                 double amt1 = piece * derivCoef[d1];
692                 derivOut[d1] += amt1;
693                 for (int d2=0; d2 <= d1; ++d2) {
694                         int to = maxAbilities + cx;
695                         double amt2 = piece * derivCoef[pmax + cx];
696                         derivOut[to] += amt2;
697                         ++cx;
698                 }
699         }
700 }
701
702 static void mapLatentDerivS(BA81FitState *state, BA81Expect *estate, int sgroup, double piece,
703                             double *derivCoef, double *derivOut)
704 {
705         int maxAbilities = estate->maxAbilities;
706         int maxDims = estate->maxDims;
707         int pmax = maxDims;
708         if (estate->numSpecific) pmax -= 1;
709
710         int sdim = pmax + sgroup;
711         double amt3 = piece * derivCoef[0];
712         derivOut[sdim] += amt3;
713
714         double amt4 = piece * derivCoef[1];
715         int to = maxAbilities + triangleLoc0(sdim);
716         derivOut[to] += amt4;
717 }
718
719 static void calcDerivCoef(BA81FitState *state, BA81Expect *estate, omxBuffer<double> &icov,
720                           const double *where, double *derivCoef)
721 {
722         omxMatrix *mean = estate->latentMeanOut;
723         omxMatrix *cov = estate->latentCovOut;
724         const int pDims = estate->numSpecific? estate->maxDims - 1 : estate->maxDims;
725         const char R='R';
726         const char L='L';
727         const char U='U';
728         const double alpha = 1;
729         const double beta = 0;
730         const int one = 1;
731
732         std::vector<double> whereDiff(pDims);
733         std::vector<double> whereGram(triangleLoc1(pDims));
734         for (int d1=0; d1 < pDims; ++d1) {
735                 whereDiff[d1] = where[d1] - omxVectorElement(mean, d1);
736         }
737         gramProduct(whereDiff.data(), whereDiff.size(), whereGram.data());
738
739         F77_CALL(dsymv)(&U, &pDims, &alpha, icov.data(), &pDims, whereDiff.data(), &one,
740                         &beta, derivCoef, &one);
741
742         std::vector<double> covGrad1(pDims * pDims);
743         std::vector<double> covGrad2(pDims * pDims);
744
745         int cx=0;
746         for (int d1=0; d1 < pDims; ++d1) {
747                 for (int d2=0; d2 <= d1; ++d2) {
748                         covGrad1[d2 * pDims + d1] = omxMatrixElement(cov, d2, d1) - whereGram[cx];
749                         ++cx;
750                 }
751         }
752
753         F77_CALL(dsymm)(&R, &L, &pDims, &pDims, &alpha, covGrad1.data(), &pDims, icov.data(),
754                         &pDims, &beta, covGrad2.data(), &pDims);
755         F77_CALL(dsymm)(&R, &L, &pDims, &pDims, &alpha, icov.data(), &pDims, covGrad2.data(),
756                         &pDims, &beta, covGrad1.data(), &pDims);
757
758         for (int d1=0; d1 < pDims; ++d1) {
759                 covGrad1[d1 * pDims + d1] /= 2.0;
760         }
761
762         cx = pDims;
763         for (int d1=0; d1 < pDims; ++d1) {
764                 int cell = d1 * pDims;
765                 for (int d2=0; d2 <= d1; ++d2) {
766                         derivCoef[cx] = -covGrad1[cell + d2];
767                         ++cx;
768                 }
769         }
770 }
771
772 static void calcDerivCoef1(BA81FitState *state, BA81Expect *estate,
773                            const double *where, int sgroup, double *derivCoef)
774 {
775         omxMatrix *mean = estate->latentMeanOut;
776         omxMatrix *cov = estate->latentCovOut;
777         const int maxDims = estate->maxDims;
778         const int specific = maxDims - 1 + sgroup;
779         double svar = omxMatrixElement(cov, specific, specific);
780         double whereDiff = where[maxDims-1] - omxVectorElement(mean, specific);
781         derivCoef[0] = whereDiff / svar;
782         derivCoef[1] = -(svar - whereDiff * whereDiff) / (2 * svar * svar);
783 }
784
785 static void gradCov_finish_1pat(const double weight, const int numIdentical, const size_t numItems,
786                             const int numLatents, const size_t numParam,
787                             BA81FitState *state, BA81Expect *estate, omxMatrix *itemParam,
788                             std::vector<double> &deriv0, std::vector<double> &latentGrad,
789                             const double Scale,
790                             std::vector<double> &patGrad,
791                             double *grad, double *meat)
792 {
793         int gradOffset = 0;
794         for (size_t ix=0; ix < numItems; ++ix) {
795                 const double *spec = estate->itemSpec[ix];
796                 double *iparam = omxMatrixColumn(itemParam, ix);
797                 const int id = spec[RPF_ISpecID];
798                 double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize;
799                 (*rpf_model[id].dLL2)(spec, iparam, myDeriv);
800
801                 for (int par = 0; par < state->paramPerItem[ix]; ++par) {
802                         int to = state->itemGradMap[gradOffset];
803                         if (to >= 0) patGrad[to] -= weight * myDeriv[par];
804                         ++gradOffset;
805                 }
806         }
807
808         for (int lx=0; lx < numLatents; ++lx) {
809                 int to = state->latentMap[lx];
810                 if (to >= 0) patGrad[to] += weight * latentGrad[lx];
811         }
812         for (size_t par=0; par < numParam; ++par) {
813                 grad[par] += patGrad[par] * Scale * numIdentical;
814         }
815         addSymOuterProd(fabs(Scale) * numIdentical, patGrad.data(), numParam, meat);
816 }
817
818 static bool gradCov(omxFitFunction *oo, FitContext *fc)
819 {
820         const double Scale = Global->llScale;
821         omxExpectation *expectation = oo->expectation;
822         BA81FitState *state = (BA81FitState*) oo->argStruct;
823         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
824         if (estate->verbose >= 1) mxLog("%s: cross product approximation", oo->matrix->name);
825
826         ba81OutcomeProb(estate, FALSE, FALSE);
827
828         const int numThreads = Global->numThreads;
829         const int numUnique = estate->numUnique;
830         const int numSpecific = estate->numSpecific;
831         const int maxDims = estate->maxDims;
832         const int pDims = numSpecific? maxDims-1 : maxDims;
833         const int maxAbilities = estate->maxAbilities;
834         omxMatrix *cov = estate->latentCovOut;
835         omxData *data = estate->data;
836         const int *rowMap = estate->rowMap;
837         int *numIdentical = estate->numIdentical;
838         const long totalQuadPoints = estate->totalQuadPoints;
839         omxMatrix *itemParam = estate->itemParam;
840         omxBuffer<double> patternLik(numUnique);
841
842         omxBuffer<double> icovBuffer(pDims * pDims);
843         for (int d1=0; d1 < pDims; ++d1) {
844                 for (int d2=0; d2 < pDims; ++d2) {
845                         icovBuffer[d1 * pDims + d2] = omxMatrixElement(cov, d1, d2);
846                 }
847         }
848         Matrix icovMat(icovBuffer.data(), pDims, pDims);
849         int info = InvertSymmetricPosDef(icovMat, 'U');
850         if (info) return FALSE;
851
852         // fill in rest from upper triangle
853         for (int rx=1; rx < pDims; ++rx) {
854                 for (int cx=0; cx < rx; ++cx) {
855                         icovBuffer[cx * pDims + rx] = icovBuffer[rx * pDims + cx];
856                 }
857         }
858
859         const int priDerivCoef = pDims + triangleLoc1(pDims);
860         const int numLatents = maxAbilities + triangleLoc1(maxAbilities);
861         const int thrDerivSize = itemParam->cols * state->itemDerivPadSize;
862         const int totalOutcomes = estate->totalOutcomes;
863         const size_t numItems = state->freeItemParams? estate->itemSpec.size() : 0;
864         const size_t numParam = fc->varGroup->vars.size();
865         std::vector<double> thrGrad(numThreads * numParam);
866         std::vector<double> thrMeat(numThreads * numParam * numParam);
867         const double *wherePrep = estate->wherePrep.data();
868
869         if (numSpecific == 0) {
870                 omxBuffer<double> thrLxk(totalQuadPoints * numThreads);
871                 omxBuffer<double> derivCoef(totalQuadPoints * priDerivCoef);
872
873 #pragma omp parallel for num_threads(numThreads)
874                 for (long qx=0; qx < totalQuadPoints; qx++) {
875                         const double *where = wherePrep + qx * maxDims;
876                         calcDerivCoef(state, estate, icovBuffer, where,
877                                       derivCoef.data() + qx * priDerivCoef);
878                 }
879
880 #pragma omp parallel for num_threads(numThreads)
881                 for (int px=0; px < numUnique; px++) {
882                         int thrId = omx_absolute_thread_num();
883                         double *lxk = thrLxk.data() + thrId * totalQuadPoints;
884                         omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
885                         std::vector<double> deriv0(thrDerivSize);
886                         std::vector<double> latentGrad(numLatents);
887                         std::vector<double> patGrad(numParam);
888                         double *grad = thrGrad.data() + thrId * numParam;
889                         double *meat = thrMeat.data() + thrId * numParam * numParam;
890                         ba81LikelihoodSlow2(estate, px, lxk);
891
892                         // If patternLik is already valid, maybe could avoid this loop TODO
893                         double patternLik1 = 0;
894                         for (long qx=0; qx < totalQuadPoints; qx++) {
895                                 patternLik1 += lxk[qx];
896                         }
897                         patternLik[px] = patternLik1;
898
899                         // if (!validPatternLik(state, patternLik1))  complain, TODO
900
901                         for (long qx=0; qx < totalQuadPoints; qx++) {
902                                 double tmp = lxk[qx];
903                                 mapLatentDeriv(state, estate, tmp, derivCoef.data() + qx * priDerivCoef,
904                                                latentGrad.data());
905
906                                 for (size_t ix=0; ix < numItems; ++ix) {
907                                         int pick = omxIntDataElementUnsafe(data, rowMap[px], ix);
908                                         if (pick == NA_INTEGER) continue;
909                                         OMXZERO(expected.data(), estate->itemOutcomes[ix]);
910                                         expected[pick-1] = tmp;
911                                         const double *spec = estate->itemSpec[ix];
912                                         double *iparam = omxMatrixColumn(itemParam, ix);
913                                         const int id = spec[RPF_ISpecID];
914                                         double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize;
915                                         (*rpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims,
916                                                               expected.data(), myDeriv);
917                                 }
918                         }
919
920                         gradCov_finish_1pat(1 / patternLik1, numIdentical[px], numItems, numLatents, numParam,
921                                         state, estate, itemParam, deriv0, latentGrad, Scale, patGrad, grad, meat);
922                 }
923         } else {
924                 const long totalPrimaryPoints = estate->totalPrimaryPoints;
925                 const long specificPoints = estate->quadGridSize;
926                 omxBuffer<double> thrLxk(totalQuadPoints * numSpecific * numThreads);
927                 omxBuffer<double> thrEi(totalPrimaryPoints * numThreads);
928                 omxBuffer<double> thrEis(totalPrimaryPoints * numSpecific * numThreads);
929                 const int derivPerPoint = priDerivCoef + 2 * numSpecific;
930                 omxBuffer<double> derivCoef(totalQuadPoints * derivPerPoint);
931
932 #pragma omp parallel for num_threads(numThreads)
933                 for (long qx=0; qx < totalQuadPoints; qx++) {
934                         const double *where = wherePrep + qx * maxDims;
935                         calcDerivCoef(state, estate, icovBuffer, where,
936                                       derivCoef.data() + qx * derivPerPoint);
937                         for (int Sgroup=0; Sgroup < numSpecific; ++Sgroup) {
938                                 calcDerivCoef1(state, estate, where, Sgroup,
939                                                derivCoef.data() + qx * derivPerPoint + priDerivCoef + 2 * Sgroup);
940                         }
941                 }
942
943 #pragma omp parallel for num_threads(numThreads)
944                 for (int px=0; px < numUnique; px++) {
945                         int thrId = omx_absolute_thread_num();
946                         double *lxk = thrLxk.data() + totalQuadPoints * numSpecific * thrId;
947                         double *Ei = thrEi.data() + totalPrimaryPoints * thrId;
948                         double *Eis = thrEis.data() + totalPrimaryPoints * numSpecific * thrId;
949                         omxBuffer<double> expected(totalOutcomes); // can use maxOutcomes instead TODO
950                         std::vector<double> deriv0(thrDerivSize);
951                         std::vector<double> latentGrad(numLatents);
952                         std::vector<double> patGrad(numParam);
953                         double *grad = thrGrad.data() + thrId * numParam;
954                         double *meat = thrMeat.data() + thrId * numParam * numParam;
955                         cai2010EiEis(estate, px, lxk, Eis, Ei);
956
957                         for (long qloc=0, eisloc=0, qx=0; eisloc < totalPrimaryPoints * numSpecific; eisloc += numSpecific) {
958                                 for (long sx=0; sx < specificPoints; sx++) {
959                                         mapLatentDeriv(state, estate, Eis[eisloc] * lxk[qloc],
960                                                        derivCoef.data() + qx * derivPerPoint,
961                                                        latentGrad.data());
962
963                                         for (int Sgroup=0; Sgroup < numSpecific; Sgroup++) {
964                                                 double lxk1 = lxk[qloc];
965                                                 double Eis1 = Eis[eisloc + Sgroup];
966                                                 double tmp = Eis1 * lxk1;
967                                                 mapLatentDerivS(state, estate, Sgroup, tmp,
968                                                                 derivCoef.data() + qx * derivPerPoint + priDerivCoef + 2 * Sgroup,
969                                                                 latentGrad.data());
970
971                                                 for (size_t ix=0; ix < numItems; ++ix) {
972                                                         if (estate->Sgroup[ix] != Sgroup) continue;
973                                                         int pick = omxIntDataElementUnsafe(data, rowMap[px], ix);
974                                                         if (pick == NA_INTEGER) continue;
975                                                         OMXZERO(expected.data(), estate->itemOutcomes[ix]);
976                                                         expected[pick-1] = tmp;
977                                                         const double *spec = estate->itemSpec[ix];
978                                                         double *iparam = omxMatrixColumn(itemParam, ix);
979                                                         const int id = spec[RPF_ISpecID];
980                                                         double *myDeriv = deriv0.data() + ix * state->itemDerivPadSize;
981                                                         (*rpf_model[id].dLL1)(spec, iparam, wherePrep + qx * maxDims,
982                                                                               expected.data(), myDeriv);
983                                                 }
984                                                 ++qloc;
985                                         }
986                                         ++qx;
987                                 }
988                         }
989
990                         // If patternLik is already valid, maybe could avoid this loop TODO
991                         double patternLik1 = 0;
992                         for (long qx=0; qx < totalPrimaryPoints; ++qx) {
993                                 patternLik1 += Ei[qx];
994                         }
995                         patternLik[px] = patternLik1;
996
997                         gradCov_finish_1pat(1 / patternLik1, numIdentical[px], numItems, numLatents, numParam,
998                                         state, estate, itemParam, deriv0, latentGrad, Scale, patGrad, grad, meat);
999                 }
1000         }
1001
1002         for (int tx=1; tx < numThreads; ++tx) {
1003                 double *th = thrGrad.data() + tx * numParam;
1004                 for (size_t en=0; en < numParam; ++en) {
1005                         thrGrad[en] += th[en];
1006                 }
1007         }
1008         for (int tx=1; tx < numThreads; ++tx) {
1009                 double *th = thrMeat.data() + tx * numParam * numParam;
1010                 for (size_t en=0; en < numParam * numParam; ++en) {
1011                         thrMeat[en] += th[en];
1012                 }
1013         }
1014         for (size_t d1=0; d1 < numParam; ++d1) {
1015                 fc->grad[d1] += thrGrad[d1];
1016         }
1017         if (fc->infoB) {
1018                 for (size_t d1=0; d1 < numParam; ++d1) {
1019                         for (size_t d2=0; d2 < numParam; ++d2) {
1020                                 int cell = d1 * numParam + d2;
1021                                 fc->infoB[cell] += thrMeat[cell];
1022                         }
1023                 }
1024         }
1025
1026         return TRUE;
1027 }
1028
1029 static void CDlatentHessian(omxFitFunction* oo, FitContext *fc)
1030 {
1031         const double Scale = Global->llScale;
1032         BA81FitState *state = (BA81FitState*) oo->argStruct;
1033         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
1034         const int maxAbilities = estate->maxAbilities;
1035         omxMatrix *cov = estate->latentCovOut;
1036         omxData *data = estate->data;
1037         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
1038         std::vector<int> &latentMap = state->latentMap;
1039
1040         if (estate->verbose >= 1) mxLog("%s: latentHessian", oo->matrix->name);
1041
1042         omxBuffer<double> icovBuffer(maxAbilities * maxAbilities);
1043         memcpy(icovBuffer.data(), cov->data, sizeof(double) * maxAbilities * maxAbilities);
1044         Matrix icovMat(icovBuffer.data(), maxAbilities, maxAbilities);
1045         int info = InvertSymmetricPosDef(icovMat, 'U');
1046         if (info) return;
1047
1048         for (int m1=0; m1 < maxAbilities; ++m1) {
1049                 for (int m2=0; m2 < m1; ++m2) {
1050                         icovBuffer[m2 * maxAbilities + m1] = icovBuffer[m1 * maxAbilities + m2];
1051                 }
1052         }
1053
1054         int px=numLatents;
1055         for (int m1=0; m1 < maxAbilities; ++m1) {
1056                 for (int m2=0; m2 <= m1; ++m2) {
1057                         int to = latentMap[px];
1058                         ++px;
1059                         if (to < 0) continue;
1060                         fc->hess[to] -= Scale * data->rows * icovBuffer[m1 * maxAbilities + m2];
1061                 }
1062         }
1063
1064         std::vector<double> term1(maxAbilities * maxAbilities);
1065         std::vector<double> term2(maxAbilities * maxAbilities);
1066
1067         int f1=0;
1068         for (int r1=0; r1 < maxAbilities; ++r1) {
1069                 for (int c1=0; c1 <= r1; ++c1) {
1070                         memcpy(term1.data()      + c1 * maxAbilities,
1071                                icovBuffer.data() + r1 * maxAbilities, maxAbilities * sizeof(double));
1072                         if (r1 != c1) {
1073                                 memcpy(term1.data()      + r1 * maxAbilities,
1074                                        icovBuffer.data() + c1 * maxAbilities, maxAbilities * sizeof(double));
1075                         }
1076                         int f2 = f1;
1077                         for (int r2=r1; r2 < maxAbilities; ++r2) {
1078                                 for (int c2 = (r1==r2? c1 : 0); c2 <= r2; ++c2) {
1079                                         int to = latentMap[numLatents + triangleLoc1(f2 + maxAbilities) + f1 + maxAbilities];
1080                                         ++f2;
1081                                         if (to < 0) continue;
1082
1083                                         memcpy(term2.data()      + c2 * maxAbilities,
1084                                                icovBuffer.data() + r2 * maxAbilities, maxAbilities * sizeof(double));
1085                                         if (r2 != c2) {
1086                                                 memcpy(term2.data()      + r2 * maxAbilities,
1087                                                        icovBuffer.data() + c2 * maxAbilities, maxAbilities * sizeof(double));
1088                                         }
1089
1090                                         double tr = 0;
1091                                         for (int d1=0; d1 < maxAbilities; ++d1) {
1092                                                 for (int d2=0; d2 < maxAbilities; ++d2) {
1093                                                         tr += term1[d2 * maxAbilities + d1] * term2[d1 * maxAbilities + d2];
1094                                                 }
1095                                         }
1096                                         // Simulation suggests the sample size should be
1097                                         // data->rows-2 but this is tedious to accomodate
1098                                         // when there are latent distribution parameter
1099                                         // equality constraints. Whether the sample size is adjusted
1100                                         // or not seems to make no detectable difference in tests.
1101                                         fc->hess[to] += Scale * data->rows * -.5 * tr;
1102
1103                                         OMXZERO(term2.data() + c2 * maxAbilities, maxAbilities);
1104                                         if (c2 != r2) OMXZERO(term2.data() + r2 * maxAbilities, maxAbilities);
1105                                 }
1106                         }
1107                         OMXZERO(term1.data() + c1 * maxAbilities, maxAbilities);
1108                         if (c1 != r1) OMXZERO(term1.data() + r1 * maxAbilities, maxAbilities);
1109                         ++f1;
1110                 }
1111         }
1112 }
1113
1114 static double
1115 ba81ComputeFit(omxFitFunction* oo, int want, FitContext *fc)
1116 {
1117         BA81FitState *state = (BA81FitState*) oo->argStruct;
1118         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
1119         if (fc) state->numFreeParam = fc->varGroup->vars.size();
1120
1121         if (estate->type == EXPECTATION_AUGMENTED) {
1122                 buildItemParamMap(oo, fc);
1123
1124                 if (want & FF_COMPUTE_PARAMFLAVOR) {
1125                         for (size_t px=0; px < state->numFreeParam; ++px) {
1126                                 if (state->paramFlavor[px] < 0) continue;
1127                                 fc->flavor[px] = state->paramFlavor[px];
1128                         }
1129                         return 0;
1130                 }
1131
1132                 if (want & FF_COMPUTE_HGPROD) {
1133                         for (size_t px=0; px < state->hgProd.size(); ++px) {
1134                                 fc->hgProd.push_back(state->hgProd[px]);
1135                         }
1136                         return 0;
1137                 }
1138
1139                 if (want & FF_COMPUTE_PREOPTIMIZE) {
1140                         omxExpectationCompute(oo->expectation, NULL);
1141                         return 0;
1142                 }
1143
1144                 if (want & FF_COMPUTE_INFO) {
1145                         buildLatentParamMap(oo, fc);
1146                         buildItemParamMap(oo, fc);
1147                         ba81SetupQuadrature(oo->expectation);
1148
1149                         if (fc->infoMethod == INFO_METHOD_HESSIAN) {
1150                                 ba81ComputeEMFit(oo, FF_COMPUTE_HESSIAN, fc);
1151                                 if (state->freeLatents) {
1152                                         CDlatentHessian(oo, fc);
1153                                 }
1154                         } else {
1155                                 omxRaiseErrorf(globalState, "Information matrix approximation method %d is not available",
1156                                                fc->infoMethod);
1157                                 return INFINITY;
1158                         }
1159                         return 0;
1160                 }
1161
1162                 double got = ba81ComputeEMFit(oo, want, fc);
1163                 return got;
1164         } else if (estate->type == EXPECTATION_OBSERVED) {
1165
1166                 if (want == FF_COMPUTE_STARTING) {
1167                         buildLatentParamMap(oo, fc);
1168                         if (state->freeLatents) setLatentStartingValues(oo, fc);
1169                         return 0;
1170                 }
1171
1172                 if (want & (FF_COMPUTE_INFO | FF_COMPUTE_GRADIENT)) {
1173                         buildLatentParamMap(oo, fc); // only to check state->freeLatents
1174                         buildItemParamMap(oo, fc);
1175                         ba81SetupQuadrature(oo->expectation);
1176
1177                         if (state->freeLatents) {
1178                                 if (want & FF_COMPUTE_INFO && fc->infoMethod != INFO_METHOD_MEAT) {
1179                                         omxRaiseErrorf(globalState, "Information matrix approximation method %d is not available",
1180                                                        fc->infoMethod);
1181                                 }
1182                                 if (!gradCov(oo, fc)) return INFINITY;
1183                         } else {
1184                                 sandwich(oo, fc);
1185                         }
1186                 }
1187                 if (want & FF_COMPUTE_HESSIAN) {
1188                         Rf_warning("%s: Hessian not available", oo->matrix->name);
1189                 }
1190
1191                 if (want & FF_COMPUTE_MAXABSCHANGE) {
1192                         double mac = std::max(omxMaxAbsDiff(state->itemParam, estate->itemParam),
1193                                               omxMaxAbsDiff(state->latentMean, estate->latentMeanOut));
1194                         fc->mac = std::max(mac, omxMaxAbsDiff(state->latentCov, estate->latentCovOut));
1195                         state->copyEstimates(estate);
1196                 }
1197
1198                 if (want & FF_COMPUTE_FIT) {
1199                         omxExpectationCompute(oo->expectation, NULL);
1200
1201                         double *patternLik = estate->patternLik;
1202                         int *numIdentical = estate->numIdentical;
1203                         int numUnique = estate->numUnique;
1204                         estate->excludedPatterns = 0;
1205                         const double LogLargest = estate->LogLargestDouble;
1206                         double got = 0;
1207 #pragma omp parallel for num_threads(Global->numThreads) reduction(+:got)
1208                         for (int ux=0; ux < numUnique; ux++) {
1209                                 if (!validPatternLik(estate, patternLik[ux])) {
1210 #pragma omp atomic
1211                                         ++estate->excludedPatterns;
1212                                         // somehow indicate that this -2LL is provisional TODO
1213                                         continue;
1214                                 }
1215                                 got += numIdentical[ux] * (log(patternLik[ux]) - LogLargest);
1216                         }
1217                         double fit = Global->llScale * got;
1218                         if (estate->verbose >= 1) mxLog("%s: observed fit %.4f (%d/%d excluded)",
1219                                                         oo->matrix->name, fit, estate->excludedPatterns, numUnique);
1220                         return fit;
1221                 }
1222
1223                 return 0;
1224         } else {
1225                 Rf_error("%s: Select EM or regular mode before computing %d", oo->matrix->name, want);
1226         }
1227 }
1228
1229 static void ba81Compute(omxFitFunction *oo, int want, FitContext *fc)
1230 {
1231         if (!want) return;
1232         double got = ba81ComputeFit(oo, want, fc);
1233         if (got) oo->matrix->data[0] = got;
1234 }
1235
1236 BA81FitState::~BA81FitState()
1237 {
1238         omxFreeAllMatrixData(itemParam);
1239         omxFreeAllMatrixData(latentMean);
1240         omxFreeAllMatrixData(latentCov);
1241 }
1242
1243 static void ba81Destroy(omxFitFunction *oo) {
1244         BA81FitState *state = (BA81FitState *) oo->argStruct;
1245         delete state;
1246 }
1247
1248 void omxInitFitFunctionBA81(omxFitFunction* oo)
1249 {
1250         if (!oo->argStruct) { // ugh!
1251                 BA81FitState *state = new BA81FitState;
1252                 oo->argStruct = state;
1253         }
1254
1255         BA81FitState *state = (BA81FitState*) oo->argStruct;
1256
1257         omxExpectation *expectation = oo->expectation;
1258         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
1259
1260         //newObj->data = oo->expectation->data;
1261
1262         oo->computeFun = ba81Compute;
1263         oo->setVarGroup = ba81SetFreeVarGroup;
1264         oo->destructFun = ba81Destroy;
1265         oo->gradientAvailable = TRUE;
1266         oo->hessianAvailable = TRUE;
1267         oo->parametersHaveFlavor = TRUE;
1268
1269         int maxParam = estate->itemParam->rows;
1270         state->itemDerivPadSize = maxParam + triangleLoc1(maxParam);
1271
1272         int numItems = estate->itemParam->cols;
1273         for (int ix=0; ix < numItems; ix++) {
1274                 const double *spec = estate->itemSpec[ix];
1275                 int id = spec[RPF_ISpecID];
1276                 if (id < 0 || id >= rpf_numModels) {
1277                         Rf_error("ItemSpec %d has unknown item model %d", ix, id);
1278                 }
1279         }
1280
1281         state->itemParam = omxInitMatrix(NULL, 0, 0, TRUE, globalState);
1282         state->latentMean = omxInitMatrix(NULL, 0, 0, TRUE, globalState);
1283         state->latentCov = omxInitMatrix(NULL, 0, 0, TRUE, globalState);
1284         state->copyEstimates(estate);
1285 }