Remove a few more exp()
[openmx:openmx.git] / src / omxFitFunctionBA81.cpp
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "omxFitFunction.h"
19 #include "omxExpectationBA81.h"
20 #include "omxOpenmpWrap.h"
21 #include "libifa-rpf.h"
22
23 static const char *NAME = "FitFunctionBA81";
24
25 struct BA81FitState {
26
27         std::vector<int> latentMap;
28         int itemDerivPadSize;     // maxParam + maxParam*(1+maxParam)/2
29         int *paramMap;            // itemParam->cols * itemDerivPadSize -> index of free parameter
30         std::vector<int> NAtriangle; // TODO remove
31         int choleskyError;
32         double *tmpLatentMean;    // maxDims
33         double *tmpLatentCov;     // maxDims * maxDims ; only lower triangle is used
34         omxMatrix *icov;          // inverse covariance matrix
35         int fitCount;             // dubious, remove? TODO
36         int gradientCount;        // dubious, remove? TODO
37
38         std::vector< FreeVarGroup* > varGroups;
39         size_t numItemParam;
40
41         BA81FitState();
42         ~BA81FitState();
43 };
44
45 BA81FitState::BA81FitState()
46 {
47         paramMap = NULL;
48         fitCount = 0;
49         gradientCount = 0;
50         tmpLatentMean = NULL;
51         tmpLatentCov = NULL;
52 }
53
54 static void buildLatentParamMap(omxFitFunction* oo, FreeVarGroup *fvg)
55 {
56         // if no latent param, need a flag to determine whether to initialize TODO
57
58         BA81FitState *state = (BA81FitState *) oo->argStruct;
59         std::vector<int> &latentMap = state->latentMap;
60         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
61         int meanNum = estate->latentMeanOut->matrixNumber;
62         int covNum = estate->latentCovOut->matrixNumber;
63         int maxAbilities = estate->maxAbilities;
64         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
65
66         latentMap.assign(numLatents, -1);
67
68         int numParam = int(fvg->vars.size());
69         for (int px=0; px < numParam; px++) {
70                 omxFreeVar *fv = fvg->vars[px];
71                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
72                         omxFreeVarLocation *loc = &fv->locations[lx];
73                         int matNum = ~loc->matrix;
74                         if (matNum == meanNum) {
75                                 latentMap[loc->row + loc->col] = px;
76                         } else if (matNum == covNum) {
77                                 int a1 = loc->row;
78                                 int a2 = loc->col;
79                                 if (a1 < a2) std::swap(a1, a2);
80                                 int cell = maxAbilities + triangleLoc1(a1) + a2;
81                                 if (latentMap[cell] == -1)
82                                         latentMap[cell] = px;
83                                 else if (latentMap[cell] != px) {
84                                         // doesn't work for multigroup constraints TODO
85                                         error("In covariance matrix, %s and %s must be constrained equal to preserve symmetry",
86                                               fvg->vars[latentMap[cell]]->name, fv->name);
87                                 }
88                                 if (a1 == a2 && fv->lbound == NEG_INF) {
89                                         fv->lbound = 1e-6;  // variance must be positive
90                                 }
91                         }
92                 }
93         }
94 }
95
96 static void buildItemParamMap(omxFitFunction* oo, FreeVarGroup *fvg)
97 {
98         BA81FitState *state = (BA81FitState *) oo->argStruct;
99         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
100         omxMatrix *itemParam = estate->itemParam;
101         int size = itemParam->cols * state->itemDerivPadSize;
102         state->paramMap = Realloc(NULL, size, int);  // matrix location to free param index
103         for (int px=0; px < size; px++) {
104                 state->paramMap[px] = -1;
105         }
106
107         size_t numFreeParams = state->numItemParam = fvg->vars.size();
108         int *pRow = Realloc(NULL, numFreeParams, int);
109         int *pCol = Realloc(NULL, numFreeParams, int);
110
111         for (size_t px=0; px < numFreeParams; px++) {
112                 pRow[px] = -1;
113                 pCol[px] = -1;
114                 omxFreeVar *fv = fvg->vars[px];
115                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
116                         omxFreeVarLocation *loc = &fv->locations[lx];
117                         int matNum = ~loc->matrix;
118                         if (matNum == itemParam->matrixNumber) {
119                                 pRow[px] = loc->row;
120                                 pCol[px] = loc->col;
121                                 int at = pCol[px] * state->itemDerivPadSize + pRow[px];
122                                 state->paramMap[at] = px;
123
124                                 const double *spec = estate->itemSpec[loc->col];
125                                 int id = spec[RPF_ISpecID];
126                                 double upper, lower;
127                                 (*rpf_model[id].paramBound)(spec, loc->row, &upper, &lower);
128                                 if (fv->lbound == NEG_INF && isfinite(lower)) fv->lbound = lower;
129                                 if (fv->ubound == INF && isfinite(upper)) fv->ubound = upper;
130                         }
131                 }
132         }
133
134         for (size_t p1=0; p1 < numFreeParams; p1++) {
135                 for (size_t p2=p1; p2 < numFreeParams; p2++) {
136                         if (pCol[p1] == -1 || pCol[p1] != pCol[p2]) continue;
137                         const double *spec = estate->itemSpec[pCol[p1]];
138                         int id = spec[RPF_ISpecID];
139                         int numParam = (*rpf_model[id].numParam)(spec);
140                         int r1 = pRow[p1];
141                         int r2 = pRow[p2];
142                         if (r1 > r2) { int tmp=r1; r1=r2; r2=tmp; }
143                         int rowOffset = 0;
144                         for (int rx=1; rx <= r2; rx++) rowOffset += rx;
145                         int at = pCol[p1] * state->itemDerivPadSize + numParam + rowOffset + r1;
146                         state->paramMap[at] = numFreeParams + p1 * numFreeParams + p2;
147                         if (p2 != p1) state->NAtriangle.push_back(p2 * numFreeParams + p1);
148                 }
149         }
150
151         Free(pRow);
152         Free(pCol);
153 }
154
155 OMXINLINE static double
156 ba81Fit1Ordinate(omxFitFunction* oo, const int *quad, const double *weight, int want, double *myDeriv)
157 {
158         BA81FitState *state = (BA81FitState*) oo->argStruct;
159         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
160         omxMatrix *itemParam = estate->itemParam;
161         int numItems = itemParam->cols;
162         int maxOutcomes = estate->maxOutcomes;
163         int maxDims = estate->maxDims;
164         int do_fit = want & FF_COMPUTE_FIT;
165         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN);
166
167         double where[maxDims];
168         pointToWhere(estate, quad, where, maxDims);
169
170         double *outcomeProb = NULL;
171         if (do_fit) {
172                 outcomeProb = computeRPF(estate, itemParam, quad, TRUE); // avoid malloc/free? TODO
173                 if (!outcomeProb) return 0;
174         }
175
176         double thr_ll = 0;
177         for (int ix=0; ix < numItems; ix++) {
178                 const double *spec = estate->itemSpec[ix];
179                 int id = spec[RPF_ISpecID];
180                 int iOutcomes = spec[RPF_ISpecOutcomes];
181
182                 double area = areaProduct(estate, quad, estate->Sgroup[ix]);
183                 if (do_fit) {
184                         for (int ox=0; ox < iOutcomes; ox++) {
185 #if 0
186 #pragma omp critical(ba81Fit1OrdinateDebug1)
187                                 if (!std::isfinite(outcomeProb[ix * maxOutcomes + ox])) {
188                                         pda(itemParam->data, itemParam->rows, itemParam->cols);
189                                         pda(outcomeProb, outcomes, numItems);
190                                         error("RPF produced NAs");
191                                 }
192 #endif
193                                 double got = weight[ox] * outcomeProb[ix * maxOutcomes + ox];
194                                 thr_ll += got * area;
195                         }
196                 }
197
198                 if (do_deriv) {
199                         double *iparam = omxMatrixColumn(itemParam, ix);
200                         double *pad = myDeriv + ix * state->itemDerivPadSize;
201                         (*rpf_model[id].dLL1)(spec, iparam, where, area, weight, pad);
202                 }
203                 weight += iOutcomes;
204         }
205
206         Free(outcomeProb);
207
208         return thr_ll;
209 }
210
211 static double
212 ba81ComputeMFit1(omxFitFunction* oo, int want, double *gradient, double *hessian)
213 {
214         BA81FitState *state = (BA81FitState*) oo->argStruct;
215         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
216         omxMatrix *customPrior = estate->customPrior;
217         omxMatrix *itemParam = estate->itemParam;
218         std::vector<const double*> &itemSpec = estate->itemSpec;   // need c++11 auto here TODO
219         int maxDims = estate->maxDims;
220         const int totalOutcomes = estate->totalOutcomes;
221
222         double *thrDeriv = Calloc(itemParam->cols * state->itemDerivPadSize * Global->numThreads, double);
223
224         double ll = 0;
225         if (customPrior) {
226                 omxRecompute(customPrior);
227                 ll = customPrior->data[0];
228                 // need deriv adjustment TODO
229         }
230
231         if (!isfinite(ll)) {
232                 omxPrint(itemParam, "item param");
233                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
234         }
235
236 #pragma omp parallel for num_threads(Global->numThreads)
237         for (long qx=0; qx < estate->totalQuadPoints; qx++) {
238                 int quad[maxDims];
239                 decodeLocation(qx, maxDims, estate->quadGridSize, quad);
240                 double *weight = estate->expected + qx * totalOutcomes;
241                 double *myDeriv = thrDeriv + itemParam->cols * state->itemDerivPadSize * omx_absolute_thread_num();
242                 double thr_ll = ba81Fit1Ordinate(oo, quad, weight, want, myDeriv);
243                 
244 #pragma omp atomic
245                 ll += thr_ll;
246         }
247
248         if (gradient) {
249                 double *deriv0 = thrDeriv;
250
251                 int perThread = itemParam->cols * state->itemDerivPadSize;
252                 for (int th=1; th < Global->numThreads; th++) {
253                         double *thrD = thrDeriv + th * perThread;
254                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
255                 }
256
257                 int numItems = itemParam->cols;
258                 for (int ix=0; ix < numItems; ix++) {
259                         const double *spec = itemSpec[ix];
260                         int id = spec[RPF_ISpecID];
261                         double *iparam = omxMatrixColumn(itemParam, ix);
262                         double *pad = deriv0 + ix * state->itemDerivPadSize;
263                         (*rpf_model[id].dLL2)(spec, iparam, pad);
264                 }
265
266                 int numFreeParams = int(state->numItemParam);
267                 int numParams = itemParam->cols * state->itemDerivPadSize;
268                 for (int ox=0; ox < numParams; ox++) {
269                         int to = state->paramMap[ox];
270                         if (to == -1) continue;
271
272                         // Need to check because this can happen if
273                         // lbounds/ubounds are not set appropriately.
274                         if (0 && !isfinite(deriv0[ox])) {
275                                 int item = ox / itemParam->rows;
276                                 mxLog("item parameters:\n");
277                                 const double *spec = itemSpec[item];
278                                 int id = spec[RPF_ISpecID];
279                                 int numParam = (*rpf_model[id].numParam)(spec);
280                                 double *iparam = omxMatrixColumn(itemParam, item);
281                                 pda(iparam, numParam, 1);
282                                 // Perhaps bounds can be pulled in from librpf? TODO
283                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
284                                       ox, item, deriv0[ox]);
285                         }
286
287                         if (to < numFreeParams) {
288                                 gradient[to] -= deriv0[ox];
289                         } else {
290                                 hessian[to - numFreeParams] -= deriv0[ox];
291                         }
292                 }
293         }
294
295         Free(thrDeriv);
296
297         return -ll;
298 }
299
300 static void
301 moveLatentDistribution(omxFitFunction *oo, FitContext *fc,
302                        double *ElatentMean, double *ElatentCov)
303 {
304         BA81FitState *state = (BA81FitState*) oo->argStruct;
305         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
306         std::vector<const double*> &itemSpec = estate->itemSpec;   // need c++11 auto here TODO
307         omxMatrix *itemParam = estate->itemParam;
308         omxMatrix *design = estate->design;
309         double *tmpLatentMean = state->tmpLatentMean;
310         double *tmpLatentCov = state->tmpLatentCov;
311         int maxDims = estate->maxDims;
312         int maxAbilities = estate->maxAbilities;
313
314         int numItems = itemParam->cols;
315         for (int ix=0; ix < numItems; ix++) {
316                 const double *spec = itemSpec[ix];
317                 int id = spec[RPF_ISpecID];
318                 const double *rawDesign = omxMatrixColumn(design, ix);
319                 int idesign[design->rows];
320                 int idx = 0;
321                 for (int dx=0; dx < design->rows; dx++) {
322                         if (isfinite(rawDesign[dx])) {
323                                 idesign[idx++] = rawDesign[dx]-1;
324                         } else {
325                                 idesign[idx++] = -1;
326                         }
327                 }
328                 for (int d1=0; d1 < idx; d1++) {
329                         if (idesign[d1] == -1) {
330                                 tmpLatentMean[d1] = 0;
331                         } else {
332                                 tmpLatentMean[d1] = ElatentMean[idesign[d1]];
333                         }
334                         for (int d2=0; d2 <= d1; d2++) {
335                                 int cell = idesign[d2] * maxAbilities + idesign[d1];
336                                 if (idesign[d1] == -1 || idesign[d2] == -1) {
337                                         tmpLatentCov[d2 * maxDims + d1] = d1==d2? 1 : 0;
338                                 } else {
339                                         tmpLatentCov[d2 * maxDims + d1] = ElatentCov[cell];
340                                 }
341                         }
342                 }
343                 if (1) {  // ease debugging, make optional TODO
344                         for (int d1=idx; d1 < maxDims; d1++) tmpLatentMean[d1] = nan("");
345                         for (int d1=0; d1 < maxDims; d1++) {
346                                 for (int d2=0; d2 < maxDims; d2++) {
347                                         if (d1 < idx && d2 < idx) continue;
348                                         tmpLatentCov[d2 * maxDims + d1] = nan("");
349                                 }
350                         }
351                 }
352                 double *iparam = omxMatrixColumn(itemParam, ix);
353                 int *mask = state->paramMap + state->itemDerivPadSize * ix;
354                 rpf_model[id].rescale(spec, iparam, mask, tmpLatentMean, tmpLatentCov);
355         }
356
357         int numFreeParams = int(fc->varGroup->vars.size());
358         for (int rx=0; rx < itemParam->rows; rx++) {
359                 for (int cx=0; cx < itemParam->cols; cx++) {
360                         int vx = state->paramMap[cx * state->itemDerivPadSize + rx];
361                         if (vx >= 0 && vx < numFreeParams) {
362                                 fc->est[vx] = omxMatrixElement(itemParam, rx, cx);
363                         }
364                 }
365         }
366 }
367
368 static void
369 schilling_bock_2005_rescale(omxFitFunction *oo, FitContext *fc)
370 {
371         BA81FitState *state = (BA81FitState*) oo->argStruct;
372         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
373         double *ElatentMean = estate->ElatentMean;
374         double *ElatentCov = estate->ElatentCov;
375         int maxAbilities = estate->maxAbilities;
376
377         //mxLog("schilling bock\n");
378         //pda(ElatentMean, maxAbilities, 1);
379         //pda(ElatentCov, maxAbilities, maxAbilities);
380         //omxPrint(design, "design");
381
382         // use omxDPOTRF instead? TODO
383         const char triangle = 'L';
384         F77_CALL(dpotrf)(&triangle, &maxAbilities, ElatentCov, &maxAbilities, &state->choleskyError);
385         if (state->choleskyError != 0) {
386                 warning("Cholesky failed with %d; rescaling disabled", state->choleskyError); // make error TODO?
387                 return;
388         }
389
390         moveLatentDistribution(oo, fc, ElatentMean, ElatentCov);
391 }
392
393 void ba81SetFreeVarGroup(omxFitFunction *oo, FreeVarGroup *fvg)
394 {}
395
396 // can use same reorganization to avoid the gram product of where for every pattern TODO
397 static void mapLatentDeriv(BA81FitState *state, BA81Expect *estate, int sgroup, double piece,
398                            const std::vector<double> &derivCoef,
399                            double *derivOut)
400 {
401         int maxAbilities = estate->maxAbilities;
402         int maxDims = estate->maxDims;
403         int pmax = maxDims;
404         if (estate->numSpecific) pmax -= 1;
405
406         if (sgroup == 0) {
407                 int cx = 0;
408                 for (int d1=0; d1 < pmax; ++d1) {
409                         double amt1 = piece * derivCoef[d1];
410 #pragma omp atomic
411                         derivOut[d1] += amt1;
412                         for (int d2=0; d2 <= d1; ++d2) {
413                                 int to = maxAbilities + cx;
414                                 double amt2 = piece * derivCoef[maxDims + cx];
415 #pragma omp atomic
416                                 derivOut[to] += amt2;
417                                 ++cx;
418                         }
419                 }
420         }
421
422         if (estate->numSpecific) {
423                 int sdim = pmax + sgroup;
424                 double amt3 = piece * derivCoef[pmax];
425 #pragma omp atomic
426                 derivOut[sdim] += amt3;
427
428                 double amt4 = piece * derivCoef[maxDims + triangleLoc0(pmax)];
429                 int to = maxAbilities + triangleLoc0(sdim);
430 #pragma omp atomic
431                 derivOut[to] += amt4;
432         }
433 }
434
435 static void gramProduct(double *vec, size_t len, double *out)
436 {
437         int cell = 0;
438         for (size_t v1=0; v1 < len; ++v1) {
439                 for (size_t v2=0; v2 <= v1; ++v2) {
440                         out[cell] = vec[v1] * vec[v2];
441                         ++cell;
442                 }
443         }
444 }
445
446 static double *reduceForSpecific(omxMatrix *mat, int maxDims, int sgroup)
447 {
448         double *out = Calloc(maxDims * maxDims, double);
449         for (int d1=0; d1 < maxDims-1; ++d1) {
450                 int cell = d1 * maxDims;
451                 for (int d2=0; d2 < maxDims-1; ++d2) {
452                         out[cell + d2] = omxMatrixElement(mat, d1, d2);
453                 }
454         }
455         int sloc = maxDims-1 + sgroup;
456         out[maxDims*maxDims - 1] = omxMatrixElement(mat, sloc, sloc);
457         return out;
458 }
459
460 static void calcDerivCoef(BA81FitState *state, BA81Expect *estate,
461                           double *where, int sgroup, std::vector<double> *derivCoef)
462 {
463         omxMatrix *mean = estate->latentMeanOut;
464         omxMatrix *cov = estate->latentCovOut;
465         omxMatrix *icov = state->icov;
466         double *covData = cov->data;
467         double *icovData = icov->data;
468         int maxDims = estate->maxDims;
469         const char R='R';
470         const char L='L';
471         const char U='U';
472         const double alpha = 1;
473         const double beta = 0;
474         const int one = 1;
475
476         double *scov = NULL;
477         double *sicov = NULL;
478         if (estate->numSpecific) {
479                 scov = reduceForSpecific(cov, maxDims, sgroup);
480                 covData = scov;
481
482                 sicov = reduceForSpecific(icov, maxDims, sgroup);
483                 icovData = sicov;
484         }
485
486         std::vector<double> whereDiff(maxDims);
487         std::vector<double> whereGram(triangleLoc1(maxDims));
488         for (int d1=0; d1 < maxDims; ++d1) {
489                 whereDiff[d1] = where[d1] - omxVectorElement(mean, d1);
490         }
491         gramProduct(whereDiff.data(), whereDiff.size(), whereGram.data());
492
493         F77_CALL(dsymv)(&U, &maxDims, &alpha, icovData, &maxDims, whereDiff.data(), &one,
494                         &beta, derivCoef->data(), &one);
495
496         std::vector<double> covGrad1(maxDims * maxDims);
497         std::vector<double> covGrad2(maxDims * maxDims);
498
499         int cx=0;
500         for (int d1=0; d1 < maxDims; ++d1) {
501                 for (int d2=0; d2 <= d1; ++d2) {
502                         covGrad1[d2 * maxDims + d1] = covData[d2 * maxDims + d1] - whereGram[cx];
503                         ++cx;
504                 }
505         }
506
507         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, covGrad1.data(), &maxDims, icovData,
508                         &maxDims, &beta, covGrad2.data(), &maxDims);
509         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, icovData, &maxDims, covGrad2.data(),
510                         &maxDims, &beta, covGrad1.data(), &maxDims);
511
512         for (int d1=0; d1 < maxDims; ++d1) {
513                 covGrad1[d1 * maxDims + d1] /= 2.0;
514         }
515
516         cx = maxDims;
517         for (int d1=0; d1 < maxDims; ++d1) {
518                 int cell = d1 * maxDims;
519                 for (int d2=0; d2 <= d1; ++d2) {
520                         (*derivCoef)[cx] = -covGrad1[cell + d2];
521                         ++cx;
522                 }
523         }
524
525         Free(scov);
526         Free(sicov);
527 }
528
529 static bool latentDeriv(omxFitFunction *oo, double *gradient)
530 {
531         omxExpectation *expectation = oo->expectation;
532         BA81FitState *state = (BA81FitState*) oo->argStruct;
533         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
534         int numUnique = estate->numUnique;
535         int numSpecific = estate->numSpecific;
536         int maxDims = estate->maxDims;
537         int maxAbilities = estate->maxAbilities;
538         int primaryDims = maxDims;
539         omxMatrix *cov = estate->latentCovOut;
540         int *numIdentical = estate->numIdentical;
541         double *patternLik = estate->patternLik;
542
543         OMXZERO(patternLik, numUnique);
544         Free(estate->_logPatternLik);
545
546         omxCopyMatrix(state->icov, cov);
547
548         int info;
549         omxDPOTRF(state->icov, &info);
550         if (info != 0) {
551                 if (info < 0) error("dpotrf invalid argument %d", -info);
552                 return FALSE;
553         }
554         omxDPOTRI(state->icov, &info);
555         if (info != 0) {
556                 if (info < 0) error("dpotri invalid argument %d", -info);
557                 return FALSE;
558         }
559         // fill in rest from upper triangle
560         for (int rx=1; rx < maxAbilities; ++rx) {
561                 for (int cx=0; cx < rx; ++cx) {
562                         omxSetMatrixElement(state->icov, rx, cx, omxMatrixElement(state->icov, cx, rx));
563                 }
564         }
565
566         int maxDerivCoef = maxDims + triangleLoc1(maxDims);
567         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
568         double *uniqueDeriv = Calloc(numUnique * numLatents, double);
569
570         if (numSpecific == 0) {
571 #pragma omp parallel for num_threads(Global->numThreads)
572                 for (long qx=0; qx < estate->totalQuadPoints; qx++) {
573                         const int thrId = omx_absolute_thread_num();
574                         int quad[maxDims];
575                         decodeLocation(qx, maxDims, estate->quadGridSize, quad);
576                         double where[maxDims];
577                         pointToWhere(estate, quad, where, maxDims);
578                         std::vector<double> derivCoef(maxDerivCoef);
579                         calcDerivCoef(state, estate, where, 0, &derivCoef);
580                         double area = estate->priQarea[qx];
581                         double *lxk = ba81LikelihoodFast(expectation, thrId, 0, quad);
582
583                         for (int px=0; px < numUnique; px++) {
584                                 double tmp = (lxk[px] * area);
585 #pragma omp atomic
586                                 patternLik[px] += tmp;
587                                 mapLatentDeriv(state, estate, 0, tmp, derivCoef,
588                                                uniqueDeriv + px * numLatents);
589                         }
590                 }
591         } else {
592                 primaryDims -= 1;
593                 int sDim = primaryDims;
594                 long specificPoints = estate->quadGridSize;
595
596 #pragma omp parallel for num_threads(Global->numThreads)
597                 for (long qx=0; qx < estate->totalPrimaryPoints; qx++) {
598                         const int thrId = omx_absolute_thread_num();
599                         int quad[maxDims];
600                         decodeLocation(qx, primaryDims, estate->quadGridSize, quad);
601
602                         cai2010(expectation, thrId, FALSE, quad);
603
604                         for (long sx=0; sx < specificPoints; sx++) {
605                                 quad[sDim] = sx;
606                                 double where[maxDims];
607                                 pointToWhere(estate, quad, where, maxDims);
608                                 for (int sgroup=0; sgroup < numSpecific; sgroup++) {
609                                         std::vector<double> derivCoef(maxDerivCoef);
610                                         calcDerivCoef(state, estate, where, sgroup, &derivCoef);
611                                         double area = areaProduct(estate, quad, sgroup);
612                                         double *lxk = ba81LikelihoodFast(expectation, thrId, sgroup, quad);
613                                         for (int px=0; px < numUnique; px++) {
614                                                 double Ei = estate->allElxk[eIndex(estate, thrId, px)];
615                                                 double Eis = estate->Eslxk[esIndex(estate, thrId, sgroup, px)];
616                                                 double tmp = ((Ei / Eis) * lxk[px] * area);
617                                                 mapLatentDeriv(state, estate, sgroup, tmp, derivCoef,
618                                                                uniqueDeriv + px * numLatents);
619                                         }
620                                 }
621                         }
622
623                         double priArea = estate->priQarea[qx];
624                         for (int px=0; px < numUnique; px++) {
625                                 double Ei = estate->allElxk[eIndex(estate, thrId, px)];
626                                 double tmp = (Ei * priArea);
627 #pragma omp atomic
628                                 patternLik[px] += tmp;
629                         }
630                 }
631         }
632
633         /*
634         std::vector<double> hess1(triangleLoc1(numLatents));
635         std::vector<double> hessSum(triangleLoc1(numLatents));
636
637         // could run nicely in parallel with numUnique * triangleLoc(numLatents) buffer
638         for (int px=0; px < numUnique; ++px) {
639                 gramProduct(uniqueDeriv + px * numLatents, numLatents, hess1.data());
640                 double dups = numIdentical[px];
641                 for (int rx=0; rx < triangleLoc1(numLatents); ++rx) {
642                         hessSum[rx] += hess1[rx] * dups;
643                 }
644         }
645         */
646
647 #pragma omp parallel for num_threads(Global->numThreads)
648         for (int px=0; px < numUnique; ++px) {
649                 double weight = numIdentical[px] / patternLik[px];
650                 for (int rx=0; rx < numLatents; ++rx) {
651                         uniqueDeriv[px * numLatents + rx] *= weight;
652                 }
653         }
654
655 #pragma omp parallel for num_threads(Global->numThreads)
656         for (int rx=0; rx < numLatents; ++rx) {
657                 for (int px=1; px < numUnique; ++px) {
658                         uniqueDeriv[rx] += uniqueDeriv[px * numLatents + rx];
659                 }
660         }
661
662         for (int l1=0; l1 < numLatents; ++l1) {
663                 int t1 = state->latentMap[l1];
664                 if (t1 < 0) continue;
665                 gradient[t1] -= 2 * uniqueDeriv[l1];
666
667                 /*
668                 for (int l2=0; l2 <= l1; ++l2) {
669                         int t2 = state->latentMap[l2];
670                         if (t2 < 0) continue;
671                         hessian[numLatents * t1 + t2] -= 2 * hessSum[triangleLoc1(l1) + l2];
672                 }
673                 */
674         }
675
676         Free(uniqueDeriv);
677
678         return TRUE;
679 }
680
681 static void recomputePatternLik(omxFitFunction *oo)
682 {
683         omxExpectation *expectation = oo->expectation;
684         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
685         int numUnique = estate->numUnique;
686         int numSpecific = estate->numSpecific;
687         int maxDims = estate->maxDims;
688         int primaryDims = maxDims;
689         double *patternLik = estate->patternLik;
690
691         if (!patternLik) {
692                 ba81Estep1(oo->expectation);
693                 return;
694         }
695
696         OMXZERO(patternLik, numUnique);
697         Free(estate->_logPatternLik);
698
699         if (numSpecific == 0) {
700 #pragma omp parallel for num_threads(Global->numThreads)
701                 for (long qx=0; qx < estate->totalQuadPoints; qx++) {
702                         const int thrId = omx_absolute_thread_num();
703                         int quad[maxDims];
704                         decodeLocation(qx, maxDims, estate->quadGridSize, quad);
705                         double where[maxDims];
706                         pointToWhere(estate, quad, where, maxDims);
707                         double area = estate->priQarea[qx];
708                         double *lxk = ba81LikelihoodFast(expectation, thrId, 0, quad);
709
710                         for (int px=0; px < numUnique; px++) {
711                                 double tmp = (lxk[px] * area);
712 #pragma omp atomic
713                                 patternLik[px] += tmp;
714                         }
715                 }
716         } else {
717                 primaryDims -= 1;
718
719 #pragma omp parallel for num_threads(Global->numThreads)
720                 for (long qx=0; qx < estate->totalPrimaryPoints; qx++) {
721                         const int thrId = omx_absolute_thread_num();
722                         int quad[maxDims];
723                         decodeLocation(qx, primaryDims, estate->quadGridSize, quad);
724
725                         cai2010(expectation, thrId, FALSE, quad);
726
727                         double priArea = estate->priQarea[qx];
728                         for (int px=0; px < numUnique; px++) {
729                                 double Ei = estate->allElxk[eIndex(estate, thrId, px)];
730                                 double tmp = (Ei * priArea);
731 #pragma omp atomic
732                                 patternLik[px] += tmp;
733                         }
734                 }
735         }
736 }
737
738 static void setLatentStartingValues(omxFitFunction *oo, FitContext *fc)
739 {
740         BA81FitState *state = (BA81FitState*) oo->argStruct;
741         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
742
743         std::vector<int> &latentMap = state->latentMap;
744         if (!latentMap.size()) buildLatentParamMap(oo, fc->varGroup);
745
746         double *ElatentMean = estate->ElatentMean;
747         double *ElatentCov = estate->ElatentCov;
748         int maxAbilities = estate->maxAbilities;
749
750         for (int a1 = 0; a1 < maxAbilities; ++a1) {
751                 if (latentMap[a1] >= 0) {
752                         int to = latentMap[a1];
753                         fc->est[to] = ElatentMean[a1];
754                 }
755
756                 for (int a2 = 0; a2 <= a1; ++a2) {
757                         int to = latentMap[maxAbilities + triangleLoc1(a1) + a2];
758                         if (to < 0) continue;
759                         fc->est[to] = ElatentCov[a1 * maxAbilities + a2];
760                 }
761         }
762         //fc->log("setLatentStartingValues", FF_COMPUTE_ESTIMATE);
763 }
764
765 static double
766 ba81ComputeFit(omxFitFunction* oo, int want, FitContext *fc)
767 {
768         BA81FitState *state = (BA81FitState*) oo->argStruct;
769         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
770
771         ++state->fitCount;
772
773         if (estate->type == EXPECTATION_AUGMENTED) {
774                 if (!state->paramMap) buildItemParamMap(oo, fc->varGroup);
775
776                 if (want & FF_COMPUTE_PREOPTIMIZE) {
777                         if (!state->paramMap) buildItemParamMap(oo, fc->varGroup);
778                         schilling_bock_2005_rescale(oo, fc); // how does this work in multigroup? TODO
779                         return 0;
780                 }
781
782                 if (want & FF_COMPUTE_POSTOPTIMIZE) {
783                         omxForceCompute(estate->EitemParam);
784                         ba81Estep1(oo->expectation);
785                         return 0;
786                 }
787
788                 if (want & FF_COMPUTE_GRADIENT) ++state->gradientCount;
789
790                 for (size_t nx=0; nx < state->NAtriangle.size(); ++nx) {
791                         fc->hess[ state->NAtriangle[nx] ] = nan("symmetric");
792                 }
793
794                 if (state->numItemParam != fc->varGroup->vars.size()) error("mismatch"); // remove TODO
795                 double got = ba81ComputeMFit1(oo, want, fc->grad, fc->hess);
796                 return got;
797         } else if (estate->type == EXPECTATION_OBSERVED) {
798                 if (state->latentMap.size() == 0) buildLatentParamMap(oo, fc->varGroup);
799
800                 omxExpectation *expectation = oo->expectation;
801
802                 if (want & FF_COMPUTE_PREOPTIMIZE) {
803                         setLatentStartingValues(oo, fc);
804                         return 0;
805                 }
806
807                 if (want & FF_COMPUTE_GRADIENT) {
808                         ba81SetupQuadrature(expectation, estate->targetQpoints, 0);
809                         ba81buildLXKcache(expectation);
810                         if (!latentDeriv(oo, fc->grad)) {
811                                 return INFINITY;
812                         }
813                 }
814
815                 if (want & FF_COMPUTE_HESSIAN) {
816                         warning("%s: Hessian is not available for latent distribution parameters", NAME);
817                 }
818
819                 if (want & FF_COMPUTE_FIT) {
820                         if (!(want & FF_COMPUTE_GRADIENT)) {
821                                 ba81SetupQuadrature(expectation, estate->targetQpoints, 0);
822                                 recomputePatternLik(oo);
823                         }
824                         double *logPatternLik = getLogPatternLik(expectation);
825                         int *numIdentical = estate->numIdentical;
826                         int numUnique = estate->numUnique;
827                         double got = 0;
828                         for (int ux=0; ux < numUnique; ux++) {
829                                 got += numIdentical[ux] * logPatternLik[ux];
830                         }
831                         //mxLog("fit %.2f", -2 * got);
832                         return -2 * got;
833                 }
834
835                 // if (want & FF_COMPUTE_POSTOPTIMIZE)  discard lxk cache? TODO
836
837                 return 0;
838         } else {
839                 error("Confused");
840         }
841 }
842
843 static void ba81Compute(omxFitFunction *oo, int want, FitContext *fc)
844 {
845         if (!want) return;
846         double got = ba81ComputeFit(oo, want, fc);
847         if (got) oo->matrix->data[0] = got;
848 }
849
850 BA81FitState::~BA81FitState()
851 {
852         Free(paramMap);
853         Free(tmpLatentMean);
854         Free(tmpLatentCov);
855         omxFreeAllMatrixData(icov);
856 }
857
858 static void ba81Destroy(omxFitFunction *oo) {
859         BA81FitState *state = (BA81FitState *) oo->argStruct;
860         delete state;
861 }
862
863 void omxInitFitFunctionBA81(omxFitFunction* oo)
864 {
865         if (!oo->argStruct) { // ugh!
866                 BA81FitState *state = new BA81FitState;
867                 oo->argStruct = state;
868         }
869
870         BA81FitState *state = (BA81FitState*) oo->argStruct;
871
872         omxExpectation *expectation = oo->expectation;
873         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
874
875         //newObj->data = oo->expectation->data;
876
877         oo->computeFun = ba81Compute;
878         oo->setVarGroup = ba81SetFreeVarGroup;
879         oo->destructFun = ba81Destroy;
880         oo->gradientAvailable = TRUE;
881         oo->hessianAvailable = TRUE;
882
883         int maxParam = estate->itemParam->rows;
884         state->itemDerivPadSize = maxParam + triangleLoc1(maxParam);
885
886         int maxAbilities = estate->maxAbilities;
887
888         state->tmpLatentMean = Realloc(NULL, estate->maxDims, double);
889         state->tmpLatentCov = Realloc(NULL, estate->maxDims * estate->maxDims, double);
890
891         int numItems = estate->itemParam->cols;
892         for (int ix=0; ix < numItems; ix++) {
893                 const double *spec = estate->itemSpec[ix];
894                 int id = spec[RPF_ISpecID];
895                 if (id < 0 || id >= rpf_numModels) {
896                         error("ItemSpec %d has unknown item model %d", ix, id);
897                 }
898         }
899
900         state->icov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
901 }