Rewrite Newton-Raphson with better math
[openmx:openmx.git] / src / omxFitFunctionBA81.cpp
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "omxFitFunction.h"
19 #include "omxExpectationBA81.h"
20 #include "omxOpenmpWrap.h"
21 #include "libifa-rpf.h"
22
23 struct BA81FitState {
24
25         bool haveLatentMap;
26         std::vector<int> latentMap;
27
28         bool haveItemMap;
29         int itemDerivPadSize;     // maxParam + maxParam*(1+maxParam)/2
30         std::vector<int> paramMap;            // itemParam->cols * itemDerivPadSize -> index of free parameter
31         std::vector<size_t> paramLocations;   // itemParam->cols * itemDerivPadSize -> # of locations
32
33         omxMatrix *cholCov;
34         int choleskyError;
35         double *tmpLatentMean;    // maxDims
36         double *tmpLatentCov;     // maxDims * maxDims ; only lower triangle is used
37         omxMatrix *icov;          // inverse covariance matrix
38         int fitCount;             // dubious, remove? TODO
39         int gradientCount;        // dubious, remove? TODO
40
41         std::vector< FreeVarGroup* > varGroups;
42         size_t numItemParam;
43
44         BA81FitState();
45         ~BA81FitState();
46 };
47
48 BA81FitState::BA81FitState()
49 {
50         fitCount = 0;
51         gradientCount = 0;
52         tmpLatentMean = NULL;
53         tmpLatentCov = NULL;
54         haveItemMap = false;
55         haveLatentMap = false;
56 }
57
58 static void buildLatentParamMap(omxFitFunction* oo, FitContext *fc)
59 {
60         FreeVarGroup *fvg = fc->varGroup;
61         BA81FitState *state = (BA81FitState *) oo->argStruct;
62         std::vector<int> &latentMap = state->latentMap;
63         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
64         int meanNum = estate->latentMeanOut->matrixNumber;
65         int covNum = estate->latentCovOut->matrixNumber;
66         int maxAbilities = estate->maxAbilities;
67         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
68
69         latentMap.assign(numLatents, -1);
70
71         int numParam = int(fvg->vars.size());
72         for (int px=0; px < numParam; px++) {
73                 omxFreeVar *fv = fvg->vars[px];
74                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
75                         omxFreeVarLocation *loc = &fv->locations[lx];
76                         int matNum = ~loc->matrix;
77                         if (matNum == meanNum) {
78                                 latentMap[loc->row + loc->col] = px;
79                         } else if (matNum == covNum) {
80                                 int a1 = loc->row;
81                                 int a2 = loc->col;
82                                 if (a1 < a2) std::swap(a1, a2);
83                                 int cell = maxAbilities + triangleLoc1(a1) + a2;
84                                 if (latentMap[cell] == -1)
85                                         latentMap[cell] = px;
86                                 else if (latentMap[cell] != px) {
87                                         // doesn't work for multigroup constraints TODO
88                                         error("In covariance matrix, %s and %s must be constrained equal to preserve symmetry",
89                                               fvg->vars[latentMap[cell]]->name, fv->name);
90                                 }
91                                 if (a1 == a2 && fv->lbound == NEG_INF) {
92                                         fv->lbound = 1e-6;  // variance must be positive
93                                         if (fc->est[px] < fv->lbound) {
94                                                 error("Starting value for variance %s is negative", fv->name);
95                                         }
96                                 }
97                         }
98                 }
99         }
100         state->haveLatentMap = TRUE;
101 }
102
103 static void buildItemParamMap(omxFitFunction* oo, FitContext *fc)
104 {
105         FreeVarGroup *fvg = fc->varGroup;
106         BA81FitState *state = (BA81FitState *) oo->argStruct;
107         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
108         omxMatrix *itemParam = estate->itemParam;
109         int size = itemParam->cols * state->itemDerivPadSize;
110         state->paramMap.assign(size, -1);  // matrix location to free param index
111         state->paramLocations.assign(size, 0);
112
113         size_t numFreeParams = state->numItemParam = fvg->vars.size();
114         int *pRow = Realloc(NULL, numFreeParams, int);
115         int *pCol = Realloc(NULL, numFreeParams, int);
116
117         for (size_t px=0; px < numFreeParams; px++) {
118                 pRow[px] = -1;
119                 pCol[px] = -1;
120                 omxFreeVar *fv = fvg->vars[px];
121                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
122                         omxFreeVarLocation *loc = &fv->locations[lx];
123                         int matNum = ~loc->matrix;
124                         if (matNum == itemParam->matrixNumber) {
125                                 pRow[px] = loc->row;
126                                 pCol[px] = loc->col;
127                                 int at = pCol[px] * state->itemDerivPadSize + pRow[px];
128                                 state->paramMap[at] = px;
129                                 state->paramLocations[at] = fv->locations.size();
130
131                                 const double *spec = estate->itemSpec[loc->col];
132                                 int id = spec[RPF_ISpecID];
133                                 double upper, lower;
134                                 (*rpf_model[id].paramBound)(spec, loc->row, &upper, &lower);
135                                 if (fv->lbound == NEG_INF && isfinite(lower)) {
136                                         fv->lbound = lower;
137                                         if (fc->est[px] < fv->lbound) {
138                                                 error("Starting value %s %f less than lower bound %f",
139                                                       fv->name, fc->est[px], lower);
140                                         }
141                                 }
142                                 if (fv->ubound == INF && isfinite(upper)) {
143                                         fv->ubound = upper;
144                                         if (fc->est[px] > fv->ubound) {
145                                                 error("Starting value %s %f greater than upper bound %f",
146                                                       fv->name, fc->est[px], upper);
147                                         }
148                                 }
149                         }
150                 }
151         }
152
153         for (size_t p1=0; p1 < numFreeParams; p1++) {
154                 for (size_t p2=p1; p2 < numFreeParams; p2++) {
155                         if (pCol[p1] == -1 || pCol[p1] != pCol[p2]) continue;
156                         const double *spec = estate->itemSpec[pCol[p1]];
157                         int id = spec[RPF_ISpecID];
158                         int numParam = (*rpf_model[id].numParam)(spec);
159                         int r1 = pRow[p1];
160                         int r2 = pRow[p2];
161                         if (r1 > r2) { int tmp=r1; r1=r2; r2=tmp; }
162                         int rowOffset = 0;
163                         for (int rx=1; rx <= r2; rx++) rowOffset += rx;
164                         int at = pCol[p1] * state->itemDerivPadSize + numParam + rowOffset + r1;
165                         state->paramMap[at] = numFreeParams + p1 * numFreeParams + p2;
166                 }
167         }
168
169         Free(pRow);
170         Free(pCol);
171
172         state->haveItemMap = TRUE;
173 }
174
175 OMXINLINE static double
176 ba81Fit1Ordinate(omxFitFunction* oo, const int *quad, const double *weight, int want, double *myDeriv)
177 {
178         BA81FitState *state = (BA81FitState*) oo->argStruct;
179         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
180         omxMatrix *itemParam = estate->itemParam;
181         int numItems = itemParam->cols;
182         int maxOutcomes = estate->maxOutcomes;
183         int maxDims = estate->maxDims;
184         int do_fit = want & FF_COMPUTE_FIT;
185         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN);
186
187         double where[maxDims];
188         pointToWhere(estate, quad, where, maxDims);
189
190         double *outcomeProb = NULL;
191         if (do_fit) {
192                 outcomeProb = computeRPF(estate, itemParam, quad, TRUE); // avoid malloc/free? TODO
193                 if (!outcomeProb) return 0;
194         }
195
196         double thr_ll = 0;
197         for (int ix=0; ix < numItems; ix++) {
198                 const double *spec = estate->itemSpec[ix];
199                 int id = spec[RPF_ISpecID];
200                 int iOutcomes = spec[RPF_ISpecOutcomes];
201
202                 double area = areaProduct(estate, quad, estate->Sgroup[ix]);
203                 if (do_fit) {
204                         for (int ox=0; ox < iOutcomes; ox++) {
205 #if 0
206 #pragma omp critical(ba81Fit1OrdinateDebug1)
207                                 if (!std::isfinite(outcomeProb[ix * maxOutcomes + ox])) {
208                                         pda(itemParam->data, itemParam->rows, itemParam->cols);
209                                         pda(outcomeProb, outcomes, numItems);
210                                         error("RPF produced NAs");
211                                 }
212 #endif
213                                 double got = weight[ox] * outcomeProb[ix * maxOutcomes + ox];
214                                 thr_ll += got * area;
215                         }
216                 }
217
218                 if (do_deriv) {
219                         double *iparam = omxMatrixColumn(itemParam, ix);
220                         double *pad = myDeriv + ix * state->itemDerivPadSize;
221                         (*rpf_model[id].dLL1)(spec, iparam, where, area, weight, pad);
222                 }
223                 weight += iOutcomes;
224         }
225
226         Free(outcomeProb);
227
228         return thr_ll;
229 }
230
231 static double
232 ba81ComputeMFit1(omxFitFunction* oo, int want, double *gradient, double *hessian)
233 {
234         BA81FitState *state = (BA81FitState*) oo->argStruct;
235         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
236         if (estate->verbose) mxLog("%s: fit-%d", oo->matrix->name, want);
237
238         omxMatrix *customPrior = estate->customPrior;
239         omxMatrix *itemParam = estate->itemParam;
240         std::vector<const double*> &itemSpec = estate->itemSpec;
241         int maxDims = estate->maxDims;
242         const int totalOutcomes = estate->totalOutcomes;
243
244         double *thrDeriv = Calloc(itemParam->cols * state->itemDerivPadSize * Global->numThreads, double);
245
246         double ll = 0;
247         if (customPrior) {
248                 omxRecompute(customPrior);
249                 ll = customPrior->data[0];
250                 // need deriv adjustment TODO
251         }
252
253         if (!isfinite(ll)) {
254                 omxPrint(itemParam, "item param");
255                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
256         }
257
258 #pragma omp parallel for num_threads(Global->numThreads)
259         for (long qx=0; qx < estate->totalQuadPoints; qx++) {
260                 int quad[maxDims];
261                 decodeLocation(qx, maxDims, estate->quadGridSize, quad);
262                 double *weight = estate->expected + qx * totalOutcomes;
263                 double *myDeriv = thrDeriv + itemParam->cols * state->itemDerivPadSize * omx_absolute_thread_num();
264                 double thr_ll = ba81Fit1Ordinate(oo, quad, weight, want, myDeriv);
265                 
266 #pragma omp atomic
267                 ll += thr_ll;
268         }
269
270         if (gradient) {
271                 double *deriv0 = thrDeriv;
272
273                 int perThread = itemParam->cols * state->itemDerivPadSize;
274                 for (int th=1; th < Global->numThreads; th++) {
275                         double *thrD = thrDeriv + th * perThread;
276                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
277                 }
278
279                 int numItems = itemParam->cols;
280                 for (int ix=0; ix < numItems; ix++) {
281                         const double *spec = itemSpec[ix];
282                         int id = spec[RPF_ISpecID];
283                         double *iparam = omxMatrixColumn(itemParam, ix);
284                         double *pad = deriv0 + ix * state->itemDerivPadSize;
285                         (*rpf_model[id].dLL2)(spec, iparam, pad);
286                 }
287
288                 int numFreeParams = int(state->numItemParam);
289                 int numParams = itemParam->cols * state->itemDerivPadSize;
290                 for (int ox=0; ox < numParams; ox++) {
291                         int to = state->paramMap[ox];
292                         if (to == -1) continue;
293
294                         // Need to check because this can happen if
295                         // lbounds/ubounds are not set appropriately.
296                         if (0 && !isfinite(deriv0[ox])) {
297                                 int item = ox / itemParam->rows;
298                                 mxLog("item parameters:\n");
299                                 const double *spec = itemSpec[item];
300                                 int id = spec[RPF_ISpecID];
301                                 int numParam = (*rpf_model[id].numParam)(spec);
302                                 double *iparam = omxMatrixColumn(itemParam, item);
303                                 pda(iparam, numParam, 1);
304                                 // Perhaps bounds can be pulled in from librpf? TODO
305                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
306                                       ox, item, deriv0[ox]);
307                         }
308
309                         if (to < numFreeParams) {
310                                 gradient[to] -= deriv0[ox];
311                         } else {
312                                 hessian[to - numFreeParams] -= deriv0[ox];
313                         }
314                 }
315         }
316
317         Free(thrDeriv);
318
319         return -ll;
320 }
321
322 static int
323 moveLatentDistribution(omxFitFunction *oo, FitContext *fc,
324                        double *ElatentMean, double *ElatentCov)
325 {
326         BA81FitState *state = (BA81FitState*) oo->argStruct;
327         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
328         std::vector<const double*> &itemSpec = estate->itemSpec;
329         omxMatrix *itemParam = estate->itemParam;
330         omxMatrix *design = estate->design;
331         double *tmpLatentMean = state->tmpLatentMean;
332         double *tmpLatentCov = state->tmpLatentCov;
333         int maxDims = estate->maxDims;
334         int maxAbilities = estate->maxAbilities;
335         int moveCount = 0;
336
337         int numItems = itemParam->cols;
338         for (int ix=0; ix < numItems; ix++) {
339                 const double *spec = itemSpec[ix];
340                 int id = spec[RPF_ISpecID];
341                 const double *rawDesign = omxMatrixColumn(design, ix);
342                 int idesign[design->rows];
343                 int idx = 0;
344                 for (int dx=0; dx < design->rows; dx++) {
345                         if (isfinite(rawDesign[dx])) {
346                                 idesign[idx++] = rawDesign[dx]-1;
347                         } else {
348                                 idesign[idx++] = -1;
349                         }
350                 }
351                 for (int d1=0; d1 < idx; d1++) {
352                         if (idesign[d1] == -1) {
353                                 tmpLatentMean[d1] = 0;
354                         } else {
355                                 tmpLatentMean[d1] = ElatentMean[idesign[d1]];
356                         }
357                         for (int d2=0; d2 <= d1; d2++) {
358                                 int cell = idesign[d2] * maxAbilities + idesign[d1];
359                                 if (idesign[d1] == -1 || idesign[d2] == -1) {
360                                         tmpLatentCov[d2 * maxDims + d1] = d1==d2? 1 : 0;
361                                 } else {
362                                         tmpLatentCov[d2 * maxDims + d1] = ElatentCov[cell];
363                                 }
364                         }
365                 }
366                 if (1) {  // ease debugging, make optional TODO
367                         for (int d1=idx; d1 < maxDims; d1++) tmpLatentMean[d1] = nan("");
368                         for (int d1=0; d1 < maxDims; d1++) {
369                                 for (int d2=0; d2 < maxDims; d2++) {
370                                         if (d1 < idx && d2 < idx) continue;
371                                         tmpLatentCov[d2 * maxDims + d1] = nan("");
372                                 }
373                         }
374                 }
375                 double *iparam = omxMatrixColumn(itemParam, ix);
376                 int *mask = state->paramMap.data() + state->itemDerivPadSize * ix;
377                 rpf_model[id].rescale(spec, iparam, mask, tmpLatentMean, tmpLatentCov);
378         }
379
380         int numFreeParams = int(fc->varGroup->vars.size());
381         for (int rx=0; rx < itemParam->rows; rx++) {
382                 for (int cx=0; cx < itemParam->cols; cx++) {
383                         int px = cx * state->itemDerivPadSize + rx;
384                         int vx = state->paramMap[px];
385                         if (vx >= 0 && vx < numFreeParams && state->paramLocations[px] == 1) {
386                                 fc->est[vx] = omxMatrixElement(itemParam, rx, cx);
387                                 ++moveCount;
388                         }
389                 }
390         }
391         return moveCount;
392 }
393
394 static void
395 schilling_bock_2005_rescale(omxFitFunction *oo, FitContext *fc)
396 {
397         BA81FitState *state = (BA81FitState*) oo->argStruct;
398         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
399         omxMatrix *cholCov = state->cholCov;
400         int maxAbilities = estate->maxAbilities;
401
402         //pda(ElatentMean, maxAbilities, 1);
403         //pda(estate->ElatentCov.data(), maxAbilities, maxAbilities);
404         //omxPrint(design, "design");
405
406         memcpy(cholCov->data, estate->ElatentCov.data(), sizeof(double) * maxAbilities * maxAbilities);
407
408         const char triangle = 'L';
409         F77_CALL(dpotrf)(&triangle, &maxAbilities, cholCov->data, &maxAbilities, &state->choleskyError);
410         if (state->choleskyError != 0) {
411                 warning("Cholesky failed with %d; rescaling disabled", state->choleskyError); // make error TODO?
412                 return;
413         }
414
415         //pda(cholCov->data, maxAbilities, maxAbilities);
416
417         int moveCount = moveLatentDistribution(oo, fc, estate->ElatentMean.data(), cholCov->data);
418         if (estate->verbose) mxLog("%s: schilling-bock (%d param)", oo->matrix->name, moveCount);
419 }
420
421 void ba81SetFreeVarGroup(omxFitFunction *oo, FreeVarGroup *fvg)
422 {}
423
424 // can use same reorganization to avoid the gram product of where for every pattern TODO
425 static void mapLatentDeriv(BA81FitState *state, BA81Expect *estate, int sgroup, double piece,
426                            const std::vector<double> &derivCoef,
427                            double *derivOut)
428 {
429         int maxAbilities = estate->maxAbilities;
430         int maxDims = estate->maxDims;
431         int pmax = maxDims;
432         if (estate->numSpecific) pmax -= 1;
433
434         if (sgroup == 0) {
435                 int cx = 0;
436                 for (int d1=0; d1 < pmax; ++d1) {
437                         double amt1 = piece * derivCoef[d1];
438 #pragma omp atomic
439                         derivOut[d1] += amt1;
440                         for (int d2=0; d2 <= d1; ++d2) {
441                                 int to = maxAbilities + cx;
442                                 double amt2 = piece * derivCoef[maxDims + cx];
443 #pragma omp atomic
444                                 derivOut[to] += amt2;
445                                 ++cx;
446                         }
447                 }
448         }
449
450         if (estate->numSpecific) {
451                 int sdim = pmax + sgroup;
452                 double amt3 = piece * derivCoef[pmax];
453 #pragma omp atomic
454                 derivOut[sdim] += amt3;
455
456                 double amt4 = piece * derivCoef[maxDims + triangleLoc0(pmax)];
457                 int to = maxAbilities + triangleLoc0(sdim);
458 #pragma omp atomic
459                 derivOut[to] += amt4;
460         }
461 }
462
463 static double *reduceForSpecific(omxMatrix *mat, int maxDims, int sgroup)
464 {
465         double *out = Calloc(maxDims * maxDims, double);
466         for (int d1=0; d1 < maxDims-1; ++d1) {
467                 int cell = d1 * maxDims;
468                 for (int d2=0; d2 < maxDims-1; ++d2) {
469                         out[cell + d2] = omxMatrixElement(mat, d1, d2);
470                 }
471         }
472         int sloc = maxDims-1 + sgroup;
473         out[maxDims*maxDims - 1] = omxMatrixElement(mat, sloc, sloc);
474         return out;
475 }
476
477 static void calcDerivCoef(BA81FitState *state, BA81Expect *estate,
478                           double *where, int sgroup, std::vector<double> *derivCoef)
479 {
480         omxMatrix *mean = estate->latentMeanOut;
481         omxMatrix *cov = estate->latentCovOut;
482         omxMatrix *icov = state->icov;
483         double *covData = cov->data;
484         double *icovData = icov->data;
485         int maxDims = estate->maxDims;
486         const char R='R';
487         const char L='L';
488         const char U='U';
489         const double alpha = 1;
490         const double beta = 0;
491         const int one = 1;
492
493         double *scov = NULL;
494         double *sicov = NULL;
495         if (estate->numSpecific) {
496                 scov = reduceForSpecific(cov, maxDims, sgroup);
497                 covData = scov;
498
499                 sicov = reduceForSpecific(icov, maxDims, sgroup);
500                 icovData = sicov;
501         }
502
503         std::vector<double> whereDiff(maxDims);
504         std::vector<double> whereGram(triangleLoc1(maxDims));
505         for (int d1=0; d1 < maxDims; ++d1) {
506                 whereDiff[d1] = where[d1] - omxVectorElement(mean, d1);
507         }
508         gramProduct(whereDiff.data(), whereDiff.size(), whereGram.data());
509
510         F77_CALL(dsymv)(&U, &maxDims, &alpha, icovData, &maxDims, whereDiff.data(), &one,
511                         &beta, derivCoef->data(), &one);
512
513         std::vector<double> covGrad1(maxDims * maxDims);
514         std::vector<double> covGrad2(maxDims * maxDims);
515
516         int cx=0;
517         for (int d1=0; d1 < maxDims; ++d1) {
518                 for (int d2=0; d2 <= d1; ++d2) {
519                         covGrad1[d2 * maxDims + d1] = covData[d2 * maxDims + d1] - whereGram[cx];
520                         ++cx;
521                 }
522         }
523
524         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, covGrad1.data(), &maxDims, icovData,
525                         &maxDims, &beta, covGrad2.data(), &maxDims);
526         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, icovData, &maxDims, covGrad2.data(),
527                         &maxDims, &beta, covGrad1.data(), &maxDims);
528
529         for (int d1=0; d1 < maxDims; ++d1) {
530                 covGrad1[d1 * maxDims + d1] /= 2.0;
531         }
532
533         cx = maxDims;
534         for (int d1=0; d1 < maxDims; ++d1) {
535                 int cell = d1 * maxDims;
536                 for (int d2=0; d2 <= d1; ++d2) {
537                         (*derivCoef)[cx] = -covGrad1[cell + d2];
538                         ++cx;
539                 }
540         }
541
542         Free(scov);
543         Free(sicov);
544 }
545
546 static bool latentDeriv(omxFitFunction *oo, double *gradient)
547 {
548         omxExpectation *expectation = oo->expectation;
549         BA81FitState *state = (BA81FitState*) oo->argStruct;
550         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
551         if (estate->verbose) mxLog("%s: latentDeriv", oo->matrix->name);
552
553         int numUnique = estate->numUnique;
554         int numSpecific = estate->numSpecific;
555         int maxDims = estate->maxDims;
556         int maxAbilities = estate->maxAbilities;
557         int primaryDims = maxDims;
558         omxMatrix *cov = estate->latentCovOut;
559         int *numIdentical = estate->numIdentical;
560         double *patternLik = estate->patternLik;
561
562         OMXZERO(patternLik, numUnique);
563
564         omxCopyMatrix(state->icov, cov);
565
566         int info;
567         omxDPOTRF(state->icov, &info);
568         if (info != 0) {
569                 if (info < 0) error("dpotrf invalid argument %d", -info);
570                 return FALSE;
571         }
572         omxDPOTRI(state->icov, &info);
573         if (info != 0) {
574                 if (info < 0) error("dpotri invalid argument %d", -info);
575                 return FALSE;
576         }
577         // fill in rest from upper triangle
578         for (int rx=1; rx < maxAbilities; ++rx) {
579                 for (int cx=0; cx < rx; ++cx) {
580                         omxSetMatrixElement(state->icov, rx, cx, omxMatrixElement(state->icov, cx, rx));
581                 }
582         }
583
584         int maxDerivCoef = maxDims + triangleLoc1(maxDims);
585         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
586         double *uniqueDeriv = Calloc(numUnique * numLatents, double);
587
588         if (numSpecific == 0) {
589 #pragma omp parallel for num_threads(Global->numThreads)
590                 for (long qx=0; qx < estate->totalQuadPoints; qx++) {
591                         const int thrId = omx_absolute_thread_num();
592                         int quad[maxDims];
593                         decodeLocation(qx, maxDims, estate->quadGridSize, quad);
594                         double where[maxDims];
595                         pointToWhere(estate, quad, where, maxDims);
596                         std::vector<double> derivCoef(maxDerivCoef);
597                         calcDerivCoef(state, estate, where, 0, &derivCoef);
598                         double area = estate->priQarea[qx];
599                         double *lxk = ba81LikelihoodFast(expectation, thrId, 0, quad);
600
601                         for (int px=0; px < numUnique; px++) {
602                                 double tmp = (lxk[px] * area);
603 #pragma omp atomic
604                                 patternLik[px] += tmp;
605                                 mapLatentDeriv(state, estate, 0, tmp, derivCoef,
606                                                uniqueDeriv + px * numLatents);
607                         }
608                 }
609         } else {
610                 primaryDims -= 1;
611                 int sDim = primaryDims;
612                 long specificPoints = estate->quadGridSize;
613
614 #pragma omp parallel for num_threads(Global->numThreads)
615                 for (long qx=0; qx < estate->totalPrimaryPoints; qx++) {
616                         const int thrId = omx_absolute_thread_num();
617                         int quad[maxDims];
618                         decodeLocation(qx, primaryDims, estate->quadGridSize, quad);
619
620                         cai2010(expectation, thrId, FALSE, quad);
621                         double *allElxk = eBase(estate, thrId);
622                         double *Eslxk = esBase(estate, thrId);
623
624                         for (long sx=0; sx < specificPoints; sx++) {
625                                 quad[sDim] = sx;
626                                 double where[maxDims];
627                                 pointToWhere(estate, quad, where, maxDims);
628                                 for (int sgroup=0; sgroup < numSpecific; sgroup++) {
629                                         std::vector<double> derivCoef(maxDerivCoef);
630                                         calcDerivCoef(state, estate, where, sgroup, &derivCoef);
631                                         double area = areaProduct(estate, quad, sgroup);
632                                         double *lxk = ba81LikelihoodFast(expectation, thrId, sgroup, quad);
633                                         for (int px=0; px < numUnique; px++) {
634                                                 double Ei = allElxk[px];
635                                                 double Eis = Eslxk[sgroup * numUnique + px];
636                                                 double tmp = ((Ei / Eis) * lxk[px] * area);
637                                                 mapLatentDeriv(state, estate, sgroup, tmp, derivCoef,
638                                                                uniqueDeriv + px * numLatents);
639                                         }
640                                 }
641                         }
642
643                         double priArea = estate->priQarea[qx];
644                         for (int px=0; px < numUnique; px++) {
645                                 double Ei = allElxk[px];
646                                 double tmp = (Ei * priArea);
647 #pragma omp atomic
648                                 patternLik[px] += tmp;
649                         }
650                 }
651         }
652
653         /*
654         std::vector<double> hess1(triangleLoc1(numLatents));
655         std::vector<double> hessSum(triangleLoc1(numLatents));
656
657         // could run nicely in parallel with numUnique * triangleLoc(numLatents) buffer
658         for (int px=0; px < numUnique; ++px) {
659                 gramProduct(uniqueDeriv + px * numLatents, numLatents, hess1.data());
660                 double dups = numIdentical[px];
661                 for (int rx=0; rx < triangleLoc1(numLatents); ++rx) {
662                         hessSum[rx] += hess1[rx] * dups;
663                 }
664         }
665         */
666
667 #pragma omp parallel for num_threads(Global->numThreads)
668         for (int px=0; px < numUnique; ++px) {
669                 double weight = numIdentical[px] / patternLik[px];
670                 for (int rx=0; rx < numLatents; ++rx) {
671                         uniqueDeriv[px * numLatents + rx] *= weight;
672                 }
673         }
674
675 #pragma omp parallel for num_threads(Global->numThreads)
676         for (int rx=0; rx < numLatents; ++rx) {
677                 for (int px=1; px < numUnique; ++px) {
678                         uniqueDeriv[rx] += uniqueDeriv[px * numLatents + rx];
679                 }
680         }
681
682         for (int l1=0; l1 < numLatents; ++l1) {
683                 int t1 = state->latentMap[l1];
684                 if (t1 < 0) continue;
685                 gradient[t1] -= 2 * uniqueDeriv[l1];
686
687                 /*
688                 for (int l2=0; l2 <= l1; ++l2) {
689                         int t2 = state->latentMap[l2];
690                         if (t2 < 0) continue;
691                         hessian[numLatents * t1 + t2] -= 2 * hessSum[triangleLoc1(l1) + l2];
692                 }
693                 */
694         }
695
696         Free(uniqueDeriv);
697
698         return TRUE;
699 }
700
701 static void setLatentStartingValues(omxFitFunction *oo, FitContext *fc)
702 {
703         BA81FitState *state = (BA81FitState*) oo->argStruct;
704         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
705         std::vector<int> &latentMap = state->latentMap;
706         std::vector<double> &ElatentMean = estate->ElatentMean;
707         std::vector<double> &ElatentCov = estate->ElatentCov;
708         int maxAbilities = estate->maxAbilities;
709
710         for (int a1 = 0; a1 < maxAbilities; ++a1) {
711                 if (latentMap[a1] >= 0) {
712                         int to = latentMap[a1];
713                         fc->est[to] = ElatentMean[a1];
714                 }
715
716                 for (int a2 = 0; a2 <= a1; ++a2) {
717                         int to = latentMap[maxAbilities + triangleLoc1(a1) + a2];
718                         if (to < 0) continue;
719                         fc->est[to] = ElatentCov[a1 * maxAbilities + a2];
720                 }
721         }
722
723         //fc->log("setLatentStartingValues", FF_COMPUTE_ESTIMATE);
724 }
725
726 static double
727 ba81ComputeFit(omxFitFunction* oo, int want, FitContext *fc)
728 {
729         BA81FitState *state = (BA81FitState*) oo->argStruct;
730         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
731
732         ++state->fitCount;
733
734         if (want & FF_COMPUTE_POSTOPTIMIZE) return 0;
735
736         if (estate->type == EXPECTATION_AUGMENTED) {
737                 if (!state->haveItemMap) buildItemParamMap(oo, fc);
738
739                 if (want & FF_COMPUTE_PREOPTIMIZE) {
740                         schilling_bock_2005_rescale(oo, fc); // how does this work in multigroup? TODO
741                         return 0;
742                 }
743
744                 if (want & FF_COMPUTE_GRADIENT) ++state->gradientCount;
745
746                 if (state->numItemParam != fc->varGroup->vars.size()) error("mismatch"); // remove TODO
747                 double got = ba81ComputeMFit1(oo, want, fc->grad, fc->hess);
748                 return got;
749         } else if (estate->type == EXPECTATION_OBSERVED) {
750                 if (!state->haveLatentMap) buildLatentParamMap(oo, fc);
751
752                 if (want & FF_COMPUTE_PREOPTIMIZE) {
753                         setLatentStartingValues(oo, fc);
754                         return 0;
755                 }
756
757                 omxExpectationCompute(oo->expectation, NULL);
758
759                 if (want & FF_COMPUTE_GRADIENT) {
760                         if (!latentDeriv(oo, fc->grad)) {
761                                 return INFINITY;
762                         }
763                 }
764
765                 if (want & FF_COMPUTE_HESSIAN) {
766                         warning("%s: Hessian is not available for latent distribution parameters", oo->matrix->name);
767                 }
768
769                 if (want & FF_COMPUTE_FIT) {
770                         if (estate->verbose) mxLog("%s: fit", oo->matrix->name);
771                         double *patternLik = estate->patternLik;
772                         int *numIdentical = estate->numIdentical;
773                         int numUnique = estate->numUnique;
774                         double got = 0;
775 #pragma omp parallel for num_threads(Global->numThreads) schedule(static,64) reduction(+:got)
776                         for (int ux=0; ux < numUnique; ux++) {
777                                 got += numIdentical[ux] * log(patternLik[ux]);
778                         }
779                         //mxLog("fit %.2f", -2 * got);
780                         return -2 * got;
781                 }
782
783                 // if (want & FF_COMPUTE_POSTOPTIMIZE)  discard lxk cache? TODO
784
785                 return 0;
786         } else {
787                 error("Confused");
788         }
789 }
790
791 static void ba81Compute(omxFitFunction *oo, int want, FitContext *fc)
792 {
793         if (!want) return;
794         double got = ba81ComputeFit(oo, want, fc);
795         if (got) oo->matrix->data[0] = got;
796 }
797
798 BA81FitState::~BA81FitState()
799 {
800         Free(tmpLatentMean);
801         Free(tmpLatentCov);
802         omxFreeAllMatrixData(icov);
803         omxFreeAllMatrixData(cholCov);
804 }
805
806 static void ba81Destroy(omxFitFunction *oo) {
807         BA81FitState *state = (BA81FitState *) oo->argStruct;
808         delete state;
809 }
810
811 void omxInitFitFunctionBA81(omxFitFunction* oo)
812 {
813         if (!oo->argStruct) { // ugh!
814                 BA81FitState *state = new BA81FitState;
815                 oo->argStruct = state;
816         }
817
818         BA81FitState *state = (BA81FitState*) oo->argStruct;
819
820         omxExpectation *expectation = oo->expectation;
821         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
822
823         //newObj->data = oo->expectation->data;
824
825         oo->computeFun = ba81Compute;
826         oo->setVarGroup = ba81SetFreeVarGroup;
827         oo->destructFun = ba81Destroy;
828         oo->gradientAvailable = TRUE;
829         oo->hessianAvailable = TRUE;
830
831         int maxParam = estate->itemParam->rows;
832         state->itemDerivPadSize = maxParam + triangleLoc1(maxParam);
833
834         int maxAbilities = estate->maxAbilities;
835
836         state->tmpLatentMean = Realloc(NULL, estate->maxDims, double);
837         state->tmpLatentCov = Realloc(NULL, estate->maxDims * estate->maxDims, double);
838
839         int numItems = estate->itemParam->cols;
840         for (int ix=0; ix < numItems; ix++) {
841                 const double *spec = estate->itemSpec[ix];
842                 int id = spec[RPF_ISpecID];
843                 if (id < 0 || id >= rpf_numModels) {
844                         error("ItemSpec %d has unknown item model %d", ix, id);
845                 }
846         }
847
848         state->icov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
849         state->cholCov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
850 }