Fix Hessian to work with equality constraints
[openmx:openmx.git] / src / omxFitFunctionBA81.cpp
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "omxFitFunction.h"
19 #include "omxExpectationBA81.h"
20 #include "omxOpenmpWrap.h"
21 #include "libifa-rpf.h"
22
23 struct BA81FitState {
24
25         bool haveLatentMap;
26         std::vector<int> latentMap;
27
28         bool haveItemMap;
29         int itemDerivPadSize;     // maxParam + maxParam*(1+maxParam)/2
30         std::vector<int> paramMap;            // itemParam->cols * itemDerivPadSize -> index of free parameter
31         std::vector<size_t> paramLocations;   // itemParam->cols * itemDerivPadSize -> # of locations
32
33         omxMatrix *cholCov;
34         int choleskyError;
35         double *tmpLatentMean;    // maxDims
36         double *tmpLatentCov;     // maxDims * maxDims ; only lower triangle is used
37         omxMatrix *icov;          // inverse covariance matrix
38
39         std::vector< FreeVarGroup* > varGroups;
40         size_t numItemParam;
41
42         BA81FitState();
43         ~BA81FitState();
44 };
45
46 BA81FitState::BA81FitState()
47 {
48         tmpLatentMean = NULL;
49         tmpLatentCov = NULL;
50         haveItemMap = false;
51         haveLatentMap = false;
52 }
53
54 static void buildLatentParamMap(omxFitFunction* oo, FitContext *fc)
55 {
56         FreeVarGroup *fvg = fc->varGroup;
57         BA81FitState *state = (BA81FitState *) oo->argStruct;
58         std::vector<int> &latentMap = state->latentMap;
59         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
60         int meanNum = estate->latentMeanOut->matrixNumber;
61         int covNum = estate->latentCovOut->matrixNumber;
62         int maxAbilities = estate->maxAbilities;
63         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
64
65         latentMap.assign(numLatents, -1);
66
67         int numParam = int(fvg->vars.size());
68         for (int px=0; px < numParam; px++) {
69                 omxFreeVar *fv = fvg->vars[px];
70                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
71                         omxFreeVarLocation *loc = &fv->locations[lx];
72                         int matNum = ~loc->matrix;
73                         if (matNum == meanNum) {
74                                 latentMap[loc->row + loc->col] = px;
75                         } else if (matNum == covNum) {
76                                 int a1 = loc->row;
77                                 int a2 = loc->col;
78                                 if (a1 < a2) std::swap(a1, a2);
79                                 int cell = maxAbilities + triangleLoc1(a1) + a2;
80                                 if (latentMap[cell] == -1)
81                                         latentMap[cell] = px;
82                                 else if (latentMap[cell] != px) {
83                                         // doesn't work for multigroup constraints TODO
84                                         error("In covariance matrix, %s and %s must be constrained equal to preserve symmetry",
85                                               fvg->vars[latentMap[cell]]->name, fv->name);
86                                 }
87                                 if (a1 == a2 && fv->lbound == NEG_INF) {
88                                         fv->lbound = 1e-6;  // variance must be positive
89                                         if (fc->est[px] < fv->lbound) {
90                                                 error("Starting value for variance %s is negative", fv->name);
91                                         }
92                                 }
93                         }
94                 }
95         }
96         state->haveLatentMap = TRUE;
97 }
98
99 static void buildItemParamMap(omxFitFunction* oo, FitContext *fc)
100 {
101         FreeVarGroup *fvg = fc->varGroup;
102         BA81FitState *state = (BA81FitState *) oo->argStruct;
103         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
104         omxMatrix *itemParam = estate->itemParam;
105         int size = itemParam->cols * state->itemDerivPadSize;
106         state->paramMap.assign(size, -1);  // matrix location to free param index
107         state->paramLocations.assign(size, 0);
108
109         size_t numFreeParams = state->numItemParam = fvg->vars.size();
110
111         for (size_t px=0; px < numFreeParams; px++) {
112                 omxFreeVar *fv = fvg->vars[px];
113                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
114                         omxFreeVarLocation *loc = &fv->locations[lx];
115                         int matNum = ~loc->matrix;
116                         if (matNum == itemParam->matrixNumber) {
117                                 int at = loc->col * state->itemDerivPadSize + loc->row;
118                                 state->paramMap[at] = px;
119                                 state->paramLocations[at] = fv->locations.size();
120
121                                 const double *spec = estate->itemSpec[loc->col];
122                                 int id = spec[RPF_ISpecID];
123                                 double upper, lower;
124                                 (*rpf_model[id].paramBound)(spec, loc->row, &upper, &lower);
125                                 if (fv->lbound == NEG_INF && isfinite(lower)) {
126                                         fv->lbound = lower;
127                                         if (fc->est[px] < fv->lbound) {
128                                                 error("Starting value %s %f less than lower bound %f",
129                                                       fv->name, fc->est[px], lower);
130                                         }
131                                 }
132                                 if (fv->ubound == INF && isfinite(upper)) {
133                                         fv->ubound = upper;
134                                         if (fc->est[px] > fv->ubound) {
135                                                 error("Starting value %s %f greater than upper bound %f",
136                                                       fv->name, fc->est[px], upper);
137                                         }
138                                 }
139                         }
140                 }
141         }
142
143         for (int cx=0; cx < itemParam->cols; ++cx) {
144                 const double *spec = estate->itemSpec[cx];
145                 int id = spec[RPF_ISpecID];
146                 int numParam = (*rpf_model[id].numParam)(spec);
147
148                 for (int p1=0; p1 < numParam; p1++) {
149                         int at1 = state->paramMap[cx * state->itemDerivPadSize + p1];
150                         if (at1 < 0) continue;
151
152                         for (int p2=0; p2 <= p1; p2++) {
153                                 int at2 = state->paramMap[cx * state->itemDerivPadSize + p2];
154                                 if (at2 < 0) continue;
155
156                                 if (at1 < at2) std::swap(at1, at2);  // lower triangle
157
158                                 //mxLog("Item %d param(%d,%d) -> H[%d,%d]", cx, p1, p2, at1, at2);
159                                 int at = cx * state->itemDerivPadSize + numParam + triangleLoc1(p1) + p2;
160                                 state->paramMap[at] = numFreeParams + at1 * numFreeParams + at2;
161                         }
162                 }
163         }
164
165         state->haveItemMap = TRUE;
166         //pia(state->paramMap.data(), state->itemDerivPadSize, itemParam->cols);
167 }
168
169 OMXINLINE static double
170 ba81Fit1Ordinate(omxFitFunction* oo, const int *quad, const double *weight, int want, double *myDeriv)
171 {
172         BA81FitState *state = (BA81FitState*) oo->argStruct;
173         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
174         omxMatrix *itemParam = estate->itemParam;
175         int numItems = itemParam->cols;
176         int maxOutcomes = estate->maxOutcomes;
177         int maxDims = estate->maxDims;
178         int do_fit = want & FF_COMPUTE_FIT;
179         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN);
180
181         double where[maxDims];
182         pointToWhere(estate, quad, where, maxDims);
183
184         double *outcomeProb = NULL;
185         if (do_fit) {
186                 outcomeProb = computeRPF(estate, itemParam, quad, TRUE); // avoid malloc/free? TODO
187                 if (!outcomeProb) return 0;
188         }
189
190         double thr_ll = 0;
191         for (int ix=0; ix < numItems; ix++) {
192                 const double *spec = estate->itemSpec[ix];
193                 int id = spec[RPF_ISpecID];
194                 int iOutcomes = spec[RPF_ISpecOutcomes];
195
196                 double area = areaProduct(estate, quad, estate->Sgroup[ix]);
197                 if (do_fit) {
198                         for (int ox=0; ox < iOutcomes; ox++) {
199 #if 0
200 #pragma omp critical(ba81Fit1OrdinateDebug1)
201                                 if (!std::isfinite(outcomeProb[ix * maxOutcomes + ox])) {
202                                         pda(itemParam->data, itemParam->rows, itemParam->cols);
203                                         pda(outcomeProb, outcomes, numItems);
204                                         error("RPF produced NAs");
205                                 }
206 #endif
207                                 double got = weight[ox] * outcomeProb[ix * maxOutcomes + ox];
208                                 thr_ll += got * area;
209                         }
210                 }
211
212                 if (do_deriv) {
213                         double *iparam = omxMatrixColumn(itemParam, ix);
214                         double *pad = myDeriv + ix * state->itemDerivPadSize;
215                         (*rpf_model[id].dLL1)(spec, iparam, where, area, weight, pad);
216                 }
217                 weight += iOutcomes;
218         }
219
220         Free(outcomeProb);
221
222         return thr_ll;
223 }
224
225 static double
226 ba81ComputeMFit1(omxFitFunction* oo, int want, double *gradient, double *hessian)
227 {
228         BA81FitState *state = (BA81FitState*) oo->argStruct;
229         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
230         if (estate->verbose) mxLog("%s: fit-%d", oo->matrix->name, want);
231
232         omxMatrix *customPrior = estate->customPrior;
233         omxMatrix *itemParam = estate->itemParam;
234         std::vector<const double*> &itemSpec = estate->itemSpec;
235         int maxDims = estate->maxDims;
236         const int totalOutcomes = estate->totalOutcomes;
237
238         double *thrDeriv = Calloc(itemParam->cols * state->itemDerivPadSize * Global->numThreads, double);
239
240         double ll = 0;
241         if (customPrior) {
242                 omxRecompute(customPrior);
243                 ll = customPrior->data[0];
244                 // need deriv adjustment TODO
245         }
246
247         if (!isfinite(ll)) {
248                 omxPrint(itemParam, "item param");
249                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
250         }
251
252 #pragma omp parallel for num_threads(Global->numThreads)
253         for (long qx=0; qx < estate->totalQuadPoints; qx++) {
254                 int quad[maxDims];
255                 decodeLocation(qx, maxDims, estate->quadGridSize, quad);
256                 double *weight = estate->expected + qx * totalOutcomes;
257                 double *myDeriv = thrDeriv + itemParam->cols * state->itemDerivPadSize * omx_absolute_thread_num();
258                 double thr_ll = ba81Fit1Ordinate(oo, quad, weight, want, myDeriv);
259                 
260 #pragma omp atomic
261                 ll += thr_ll;
262         }
263
264         if (gradient) {
265                 double *deriv0 = thrDeriv;
266
267                 int perThread = itemParam->cols * state->itemDerivPadSize;
268                 for (int th=1; th < Global->numThreads; th++) {
269                         double *thrD = thrDeriv + th * perThread;
270                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
271                 }
272
273                 int numItems = itemParam->cols;
274                 for (int ix=0; ix < numItems; ix++) {
275                         const double *spec = itemSpec[ix];
276                         int id = spec[RPF_ISpecID];
277                         double *iparam = omxMatrixColumn(itemParam, ix);
278                         double *pad = deriv0 + ix * state->itemDerivPadSize;
279                         (*rpf_model[id].dLL2)(spec, iparam, pad);
280                 }
281
282                 int numFreeParams = int(state->numItemParam);
283                 int numParams = itemParam->cols * state->itemDerivPadSize;
284                 for (int ox=0; ox < numParams; ox++) {
285                         int to = state->paramMap[ox];
286                         if (to == -1) continue;
287
288                         // Need to check because this can happen if
289                         // lbounds/ubounds are not set appropriately.
290                         if (0 && !isfinite(deriv0[ox])) {
291                                 int item = ox / itemParam->rows;
292                                 mxLog("item parameters:\n");
293                                 const double *spec = itemSpec[item];
294                                 int id = spec[RPF_ISpecID];
295                                 int numParam = (*rpf_model[id].numParam)(spec);
296                                 double *iparam = omxMatrixColumn(itemParam, item);
297                                 pda(iparam, numParam, 1);
298                                 // Perhaps bounds can be pulled in from librpf? TODO
299                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
300                                       ox, item, deriv0[ox]);
301                         }
302
303                         if (to < numFreeParams) {
304                                 gradient[to] -= deriv0[ox];
305                         } else {
306                                 int Hto = to - numFreeParams;
307                                 hessian[Hto] -= deriv0[ox];
308                         }
309                 }
310         }
311
312         Free(thrDeriv);
313
314         return -ll;
315 }
316
317 static int
318 moveLatentDistribution(omxFitFunction *oo, FitContext *fc,
319                        double *ElatentMean, double *ElatentCov)
320 {
321         BA81FitState *state = (BA81FitState*) oo->argStruct;
322         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
323         std::vector<const double*> &itemSpec = estate->itemSpec;
324         omxMatrix *itemParam = estate->itemParam;
325         omxMatrix *design = estate->design;
326         double *tmpLatentMean = state->tmpLatentMean;
327         double *tmpLatentCov = state->tmpLatentCov;
328         int maxDims = estate->maxDims;
329         int maxAbilities = estate->maxAbilities;
330         int moveCount = 0;
331
332         int numItems = itemParam->cols;
333         for (int ix=0; ix < numItems; ix++) {
334                 const double *spec = itemSpec[ix];
335                 int id = spec[RPF_ISpecID];
336                 const double *rawDesign = omxMatrixColumn(design, ix);
337                 int idesign[design->rows];
338                 int idx = 0;
339                 for (int dx=0; dx < design->rows; dx++) {
340                         if (isfinite(rawDesign[dx])) {
341                                 idesign[idx++] = rawDesign[dx]-1;
342                         } else {
343                                 idesign[idx++] = -1;
344                         }
345                 }
346                 for (int d1=0; d1 < idx; d1++) {
347                         if (idesign[d1] == -1) {
348                                 tmpLatentMean[d1] = 0;
349                         } else {
350                                 tmpLatentMean[d1] = ElatentMean[idesign[d1]];
351                         }
352                         for (int d2=0; d2 <= d1; d2++) {
353                                 int cell = idesign[d2] * maxAbilities + idesign[d1];
354                                 if (idesign[d1] == -1 || idesign[d2] == -1) {
355                                         tmpLatentCov[d2 * maxDims + d1] = d1==d2? 1 : 0;
356                                 } else {
357                                         tmpLatentCov[d2 * maxDims + d1] = ElatentCov[cell];
358                                 }
359                         }
360                 }
361                 if (1) {  // ease debugging, make optional TODO
362                         for (int d1=idx; d1 < maxDims; d1++) tmpLatentMean[d1] = nan("");
363                         for (int d1=0; d1 < maxDims; d1++) {
364                                 for (int d2=0; d2 < maxDims; d2++) {
365                                         if (d1 < idx && d2 < idx) continue;
366                                         tmpLatentCov[d2 * maxDims + d1] = nan("");
367                                 }
368                         }
369                 }
370                 double *iparam = omxMatrixColumn(itemParam, ix);
371                 int *mask = state->paramMap.data() + state->itemDerivPadSize * ix;
372                 rpf_model[id].rescale(spec, iparam, mask, tmpLatentMean, tmpLatentCov);
373         }
374
375         int numFreeParams = int(fc->varGroup->vars.size());
376         for (int rx=0; rx < itemParam->rows; rx++) {
377                 for (int cx=0; cx < itemParam->cols; cx++) {
378                         int px = cx * state->itemDerivPadSize + rx;
379                         int vx = state->paramMap[px];
380                         if (vx >= 0 && vx < numFreeParams && state->paramLocations[px] == 1) {
381                                 fc->est[vx] = omxMatrixElement(itemParam, rx, cx);
382                                 ++moveCount;
383                         }
384                 }
385         }
386         return moveCount;
387 }
388
389 static void
390 schilling_bock_2005_rescale(omxFitFunction *oo, FitContext *fc)
391 {
392         BA81FitState *state = (BA81FitState*) oo->argStruct;
393         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
394         omxMatrix *cholCov = state->cholCov;
395         int maxAbilities = estate->maxAbilities;
396
397         //pda(ElatentMean, maxAbilities, 1);
398         //pda(estate->ElatentCov.data(), maxAbilities, maxAbilities);
399         //omxPrint(design, "design");
400
401         memcpy(cholCov->data, estate->ElatentCov.data(), sizeof(double) * maxAbilities * maxAbilities);
402
403         const char triangle = 'L';
404         F77_CALL(dpotrf)(&triangle, &maxAbilities, cholCov->data, &maxAbilities, &state->choleskyError);
405         if (state->choleskyError != 0) {
406                 warning("Cholesky failed with %d; rescaling disabled", state->choleskyError); // make error TODO?
407                 return;
408         }
409
410         //pda(cholCov->data, maxAbilities, maxAbilities);
411
412         int moveCount = moveLatentDistribution(oo, fc, estate->ElatentMean.data(), cholCov->data);
413         if (estate->verbose) mxLog("%s: schilling-bock (%d param)", oo->matrix->name, moveCount);
414 }
415
416 void ba81SetFreeVarGroup(omxFitFunction *oo, FreeVarGroup *fvg)
417 {}
418
419 // can use same reorganization to avoid the gram product of where for every pattern TODO
420 static void mapLatentDeriv(BA81FitState *state, BA81Expect *estate, int sgroup, double piece,
421                            const std::vector<double> &derivCoef,
422                            double *derivOut)
423 {
424         int maxAbilities = estate->maxAbilities;
425         int maxDims = estate->maxDims;
426         int pmax = maxDims;
427         if (estate->numSpecific) pmax -= 1;
428
429         if (sgroup == 0) {
430                 int cx = 0;
431                 for (int d1=0; d1 < pmax; ++d1) {
432                         double amt1 = piece * derivCoef[d1];
433 #pragma omp atomic
434                         derivOut[d1] += amt1;
435                         for (int d2=0; d2 <= d1; ++d2) {
436                                 int to = maxAbilities + cx;
437                                 double amt2 = piece * derivCoef[maxDims + cx];
438 #pragma omp atomic
439                                 derivOut[to] += amt2;
440                                 ++cx;
441                         }
442                 }
443         }
444
445         if (estate->numSpecific) {
446                 int sdim = pmax + sgroup;
447                 double amt3 = piece * derivCoef[pmax];
448 #pragma omp atomic
449                 derivOut[sdim] += amt3;
450
451                 double amt4 = piece * derivCoef[maxDims + triangleLoc0(pmax)];
452                 int to = maxAbilities + triangleLoc0(sdim);
453 #pragma omp atomic
454                 derivOut[to] += amt4;
455         }
456 }
457
458 static double *reduceForSpecific(omxMatrix *mat, int maxDims, int sgroup)
459 {
460         double *out = Calloc(maxDims * maxDims, double);
461         for (int d1=0; d1 < maxDims-1; ++d1) {
462                 int cell = d1 * maxDims;
463                 for (int d2=0; d2 < maxDims-1; ++d2) {
464                         out[cell + d2] = omxMatrixElement(mat, d1, d2);
465                 }
466         }
467         int sloc = maxDims-1 + sgroup;
468         out[maxDims*maxDims - 1] = omxMatrixElement(mat, sloc, sloc);
469         return out;
470 }
471
472 static void calcDerivCoef(BA81FitState *state, BA81Expect *estate,
473                           double *where, int sgroup, std::vector<double> *derivCoef)
474 {
475         omxMatrix *mean = estate->latentMeanOut;
476         omxMatrix *cov = estate->latentCovOut;
477         omxMatrix *icov = state->icov;
478         double *covData = cov->data;
479         double *icovData = icov->data;
480         int maxDims = estate->maxDims;
481         const char R='R';
482         const char L='L';
483         const char U='U';
484         const double alpha = 1;
485         const double beta = 0;
486         const int one = 1;
487
488         double *scov = NULL;
489         double *sicov = NULL;
490         if (estate->numSpecific) {
491                 scov = reduceForSpecific(cov, maxDims, sgroup);
492                 covData = scov;
493
494                 sicov = reduceForSpecific(icov, maxDims, sgroup);
495                 icovData = sicov;
496         }
497
498         std::vector<double> whereDiff(maxDims);
499         std::vector<double> whereGram(triangleLoc1(maxDims));
500         for (int d1=0; d1 < maxDims; ++d1) {
501                 whereDiff[d1] = where[d1] - omxVectorElement(mean, d1);
502         }
503         gramProduct(whereDiff.data(), whereDiff.size(), whereGram.data());
504
505         F77_CALL(dsymv)(&U, &maxDims, &alpha, icovData, &maxDims, whereDiff.data(), &one,
506                         &beta, derivCoef->data(), &one);
507
508         std::vector<double> covGrad1(maxDims * maxDims);
509         std::vector<double> covGrad2(maxDims * maxDims);
510
511         int cx=0;
512         for (int d1=0; d1 < maxDims; ++d1) {
513                 for (int d2=0; d2 <= d1; ++d2) {
514                         covGrad1[d2 * maxDims + d1] = covData[d2 * maxDims + d1] - whereGram[cx];
515                         ++cx;
516                 }
517         }
518
519         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, covGrad1.data(), &maxDims, icovData,
520                         &maxDims, &beta, covGrad2.data(), &maxDims);
521         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, icovData, &maxDims, covGrad2.data(),
522                         &maxDims, &beta, covGrad1.data(), &maxDims);
523
524         for (int d1=0; d1 < maxDims; ++d1) {
525                 covGrad1[d1 * maxDims + d1] /= 2.0;
526         }
527
528         cx = maxDims;
529         for (int d1=0; d1 < maxDims; ++d1) {
530                 int cell = d1 * maxDims;
531                 for (int d2=0; d2 <= d1; ++d2) {
532                         (*derivCoef)[cx] = -covGrad1[cell + d2];
533                         ++cx;
534                 }
535         }
536
537         Free(scov);
538         Free(sicov);
539 }
540
541 static bool latentDeriv(omxFitFunction *oo, double *gradient)
542 {
543         omxExpectation *expectation = oo->expectation;
544         BA81FitState *state = (BA81FitState*) oo->argStruct;
545         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
546         if (estate->verbose) mxLog("%s: latentDeriv", oo->matrix->name);
547
548         int numUnique = estate->numUnique;
549         int numSpecific = estate->numSpecific;
550         int maxDims = estate->maxDims;
551         int maxAbilities = estate->maxAbilities;
552         int primaryDims = maxDims;
553         omxMatrix *cov = estate->latentCovOut;
554         int *numIdentical = estate->numIdentical;
555         double *patternLik = estate->patternLik;
556
557         OMXZERO(patternLik, numUnique);
558
559         omxCopyMatrix(state->icov, cov);
560
561         int info;
562         omxDPOTRF(state->icov, &info);
563         if (info != 0) {
564                 if (info < 0) error("dpotrf invalid argument %d", -info);
565                 return FALSE;
566         }
567         omxDPOTRI(state->icov, &info);
568         if (info != 0) {
569                 if (info < 0) error("dpotri invalid argument %d", -info);
570                 return FALSE;
571         }
572         // fill in rest from upper triangle
573         for (int rx=1; rx < maxAbilities; ++rx) {
574                 for (int cx=0; cx < rx; ++cx) {
575                         omxSetMatrixElement(state->icov, rx, cx, omxMatrixElement(state->icov, cx, rx));
576                 }
577         }
578
579         int maxDerivCoef = maxDims + triangleLoc1(maxDims);
580         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
581         double *uniqueDeriv = Calloc(numUnique * numLatents, double);
582
583         if (numSpecific == 0) {
584 #pragma omp parallel for num_threads(Global->numThreads)
585                 for (long qx=0; qx < estate->totalQuadPoints; qx++) {
586                         const int thrId = omx_absolute_thread_num();
587                         int quad[maxDims];
588                         decodeLocation(qx, maxDims, estate->quadGridSize, quad);
589                         double where[maxDims];
590                         pointToWhere(estate, quad, where, maxDims);
591                         std::vector<double> derivCoef(maxDerivCoef);
592                         calcDerivCoef(state, estate, where, 0, &derivCoef);
593                         double area = estate->priQarea[qx];
594                         double *lxk = ba81LikelihoodFast(expectation, thrId, 0, quad);
595
596                         for (int px=0; px < numUnique; px++) {
597                                 double tmp = (lxk[px] * area);
598 #pragma omp atomic
599                                 patternLik[px] += tmp;
600                                 mapLatentDeriv(state, estate, 0, tmp, derivCoef,
601                                                uniqueDeriv + px * numLatents);
602                         }
603                 }
604         } else {
605                 primaryDims -= 1;
606                 int sDim = primaryDims;
607                 long specificPoints = estate->quadGridSize;
608
609 #pragma omp parallel for num_threads(Global->numThreads)
610                 for (long qx=0; qx < estate->totalPrimaryPoints; qx++) {
611                         const int thrId = omx_absolute_thread_num();
612                         int quad[maxDims];
613                         decodeLocation(qx, primaryDims, estate->quadGridSize, quad);
614
615                         cai2010(expectation, thrId, FALSE, quad);
616                         double *allElxk = eBase(estate, thrId);
617                         double *Eslxk = esBase(estate, thrId);
618
619                         for (long sx=0; sx < specificPoints; sx++) {
620                                 quad[sDim] = sx;
621                                 double where[maxDims];
622                                 pointToWhere(estate, quad, where, maxDims);
623                                 for (int sgroup=0; sgroup < numSpecific; sgroup++) {
624                                         std::vector<double> derivCoef(maxDerivCoef);
625                                         calcDerivCoef(state, estate, where, sgroup, &derivCoef);
626                                         double area = areaProduct(estate, quad, sgroup);
627                                         double *lxk = ba81LikelihoodFast(expectation, thrId, sgroup, quad);
628                                         for (int px=0; px < numUnique; px++) {
629                                                 double Ei = allElxk[px];
630                                                 double Eis = Eslxk[sgroup * numUnique + px];
631                                                 double tmp = ((Ei / Eis) * lxk[px] * area);
632                                                 mapLatentDeriv(state, estate, sgroup, tmp, derivCoef,
633                                                                uniqueDeriv + px * numLatents);
634                                         }
635                                 }
636                         }
637
638                         double priArea = estate->priQarea[qx];
639                         for (int px=0; px < numUnique; px++) {
640                                 double Ei = allElxk[px];
641                                 double tmp = (Ei * priArea);
642 #pragma omp atomic
643                                 patternLik[px] += tmp;
644                         }
645                 }
646         }
647
648         /*
649         std::vector<double> hess1(triangleLoc1(numLatents));
650         std::vector<double> hessSum(triangleLoc1(numLatents));
651
652         // could run nicely in parallel with numUnique * triangleLoc(numLatents) buffer
653         for (int px=0; px < numUnique; ++px) {
654                 gramProduct(uniqueDeriv + px * numLatents, numLatents, hess1.data());
655                 double dups = numIdentical[px];
656                 for (int rx=0; rx < triangleLoc1(numLatents); ++rx) {
657                         hessSum[rx] += hess1[rx] * dups;
658                 }
659         }
660         */
661
662 #pragma omp parallel for num_threads(Global->numThreads)
663         for (int px=0; px < numUnique; ++px) {
664                 double weight = numIdentical[px] / patternLik[px];
665                 for (int rx=0; rx < numLatents; ++rx) {
666                         uniqueDeriv[px * numLatents + rx] *= weight;
667                 }
668         }
669
670 #pragma omp parallel for num_threads(Global->numThreads)
671         for (int rx=0; rx < numLatents; ++rx) {
672                 for (int px=1; px < numUnique; ++px) {
673                         uniqueDeriv[rx] += uniqueDeriv[px * numLatents + rx];
674                 }
675         }
676
677         for (int l1=0; l1 < numLatents; ++l1) {
678                 int t1 = state->latentMap[l1];
679                 if (t1 < 0) continue;
680                 gradient[t1] -= 2 * uniqueDeriv[l1];
681
682                 /*
683                 for (int l2=0; l2 <= l1; ++l2) {
684                         int t2 = state->latentMap[l2];
685                         if (t2 < 0) continue;
686                         hessian[numLatents * t1 + t2] -= 2 * hessSum[triangleLoc1(l1) + l2];
687                 }
688                 */
689         }
690
691         Free(uniqueDeriv);
692
693         return TRUE;
694 }
695
696 static void setLatentStartingValues(omxFitFunction *oo, FitContext *fc)
697 {
698         BA81FitState *state = (BA81FitState*) oo->argStruct;
699         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
700         std::vector<int> &latentMap = state->latentMap;
701         std::vector<double> &ElatentMean = estate->ElatentMean;
702         std::vector<double> &ElatentCov = estate->ElatentCov;
703         int maxAbilities = estate->maxAbilities;
704
705         for (int a1 = 0; a1 < maxAbilities; ++a1) {
706                 if (latentMap[a1] >= 0) {
707                         int to = latentMap[a1];
708                         fc->est[to] = ElatentMean[a1];
709                 }
710
711                 for (int a2 = 0; a2 <= a1; ++a2) {
712                         int to = latentMap[maxAbilities + triangleLoc1(a1) + a2];
713                         if (to < 0) continue;
714                         fc->est[to] = ElatentCov[a1 * maxAbilities + a2];
715                 }
716         }
717
718         //fc->log("setLatentStartingValues", FF_COMPUTE_ESTIMATE);
719 }
720
721 static double
722 ba81ComputeFit(omxFitFunction* oo, int want, FitContext *fc)
723 {
724         BA81FitState *state = (BA81FitState*) oo->argStruct;
725         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
726
727         if (want & FF_COMPUTE_POSTOPTIMIZE) return 0;
728
729         if (estate->type == EXPECTATION_AUGMENTED) {
730                 if (!state->haveItemMap) buildItemParamMap(oo, fc);
731
732                 if (want & FF_COMPUTE_PREOPTIMIZE) {
733                         schilling_bock_2005_rescale(oo, fc); // how does this work in multigroup? TODO
734                         return 0;
735                 }
736
737                 if (state->numItemParam != fc->varGroup->vars.size()) error("mismatch"); // remove TODO
738                 double got = ba81ComputeMFit1(oo, want, fc->grad, fc->hess);
739                 return got;
740         } else if (estate->type == EXPECTATION_OBSERVED) {
741                 if (!state->haveLatentMap) buildLatentParamMap(oo, fc);
742
743                 if (want & FF_COMPUTE_PREOPTIMIZE) {
744                         setLatentStartingValues(oo, fc);
745                         return 0;
746                 }
747
748                 omxExpectationCompute(oo->expectation, NULL);
749
750                 if (want & FF_COMPUTE_GRADIENT) {
751                         if (!latentDeriv(oo, fc->grad)) {
752                                 return INFINITY;
753                         }
754                 }
755
756                 if (want & FF_COMPUTE_HESSIAN) {
757                         warning("%s: Hessian is not available for latent distribution parameters", oo->matrix->name);
758                 }
759
760                 if (want & FF_COMPUTE_FIT) {
761                         if (estate->verbose) mxLog("%s: fit", oo->matrix->name);
762                         double *patternLik = estate->patternLik;
763                         int *numIdentical = estate->numIdentical;
764                         int numUnique = estate->numUnique;
765                         double got = 0;
766 #pragma omp parallel for num_threads(Global->numThreads) schedule(static,64) reduction(+:got)
767                         for (int ux=0; ux < numUnique; ux++) {
768                                 got += numIdentical[ux] * log(patternLik[ux]);
769                         }
770                         //mxLog("fit %.2f", -2 * got);
771                         return -2 * got;
772                 }
773
774                 // if (want & FF_COMPUTE_POSTOPTIMIZE)  discard lxk cache? TODO
775
776                 return 0;
777         } else {
778                 error("Confused");
779         }
780 }
781
782 static void ba81Compute(omxFitFunction *oo, int want, FitContext *fc)
783 {
784         if (!want) return;
785         double got = ba81ComputeFit(oo, want, fc);
786         if (got) oo->matrix->data[0] = got;
787 }
788
789 BA81FitState::~BA81FitState()
790 {
791         Free(tmpLatentMean);
792         Free(tmpLatentCov);
793         omxFreeAllMatrixData(icov);
794         omxFreeAllMatrixData(cholCov);
795 }
796
797 static void ba81Destroy(omxFitFunction *oo) {
798         BA81FitState *state = (BA81FitState *) oo->argStruct;
799         delete state;
800 }
801
802 void omxInitFitFunctionBA81(omxFitFunction* oo)
803 {
804         if (!oo->argStruct) { // ugh!
805                 BA81FitState *state = new BA81FitState;
806                 oo->argStruct = state;
807         }
808
809         BA81FitState *state = (BA81FitState*) oo->argStruct;
810
811         omxExpectation *expectation = oo->expectation;
812         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
813
814         //newObj->data = oo->expectation->data;
815
816         oo->computeFun = ba81Compute;
817         oo->setVarGroup = ba81SetFreeVarGroup;
818         oo->destructFun = ba81Destroy;
819         oo->gradientAvailable = TRUE;
820         oo->hessianAvailable = TRUE;
821
822         int maxParam = estate->itemParam->rows;
823         state->itemDerivPadSize = maxParam + triangleLoc1(maxParam);
824
825         int maxAbilities = estate->maxAbilities;
826
827         state->tmpLatentMean = Realloc(NULL, estate->maxDims, double);
828         state->tmpLatentCov = Realloc(NULL, estate->maxDims * estate->maxDims, double);
829
830         int numItems = estate->itemParam->cols;
831         for (int ix=0; ix < numItems; ix++) {
832                 const double *spec = estate->itemSpec[ix];
833                 int id = spec[RPF_ISpecID];
834                 if (id < 0 || id >= rpf_numModels) {
835                         error("ItemSpec %d has unknown item model %d", ix, id);
836                 }
837         }
838
839         state->icov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
840         state->cholCov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
841 }