Auto-tune Newton-Raphson
[openmx:openmx.git] / src / omxFitFunctionBA81.cpp
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "omxFitFunction.h"
19 #include "omxExpectationBA81.h"
20 #include "omxOpenmpWrap.h"
21 #include "libifa-rpf.h"
22
23 struct BA81FitState {
24
25         bool haveLatentMap;
26         std::vector<int> latentMap;
27
28         bool haveItemMap;
29         int itemDerivPadSize;     // maxParam + maxParam*(1+maxParam)/2
30         std::vector<int> paramFlavor;        // freeParam
31         std::vector<int> paramMap;           // itemParam->cols * itemDerivPadSize -> index of free parameter
32         std::vector<int> paramLocations;     // param# -> count of appearances in ItemParam
33         std::vector<int> itemParamFree;      // itemParam->cols * itemParam->rows
34         std::vector<int> ihessDivisor;       // freeParam * freeParam
35
36         omxMatrix *cholCov;
37         int choleskyError;
38         double *tmpLatentMean;    // maxDims
39         double *tmpLatentCov;     // maxDims * maxDims ; only lower triangle is used
40         omxMatrix *icov;          // inverse covariance matrix
41
42         std::vector< FreeVarGroup* > varGroups;
43         size_t numItemParam;
44
45         BA81FitState();
46         ~BA81FitState();
47 };
48
49 BA81FitState::BA81FitState()
50 {
51         tmpLatentMean = NULL;
52         tmpLatentCov = NULL;
53         haveItemMap = false;
54         haveLatentMap = false;
55 }
56
57 static void buildLatentParamMap(omxFitFunction* oo, FitContext *fc)
58 {
59         FreeVarGroup *fvg = fc->varGroup;
60         BA81FitState *state = (BA81FitState *) oo->argStruct;
61         std::vector<int> &latentMap = state->latentMap;
62         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
63         int meanNum = estate->latentMeanOut->matrixNumber;
64         int covNum = estate->latentCovOut->matrixNumber;
65         int itemNum = estate->itemParam->matrixNumber;
66         int maxAbilities = estate->maxAbilities;
67         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
68
69         latentMap.assign(numLatents, -1);
70
71         int numParam = int(fvg->vars.size());
72         for (int px=0; px < numParam; px++) {
73                 omxFreeVar *fv = fvg->vars[px];
74                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
75                         omxFreeVarLocation *loc = &fv->locations[lx];
76                         int matNum = ~loc->matrix;
77                         if (matNum == meanNum) {
78                                 latentMap[loc->row + loc->col] = px;
79                         } else if (matNum == covNum) {
80                                 int a1 = loc->row;
81                                 int a2 = loc->col;
82                                 if (a1 < a2) std::swap(a1, a2);
83                                 int cell = maxAbilities + triangleLoc1(a1) + a2;
84                                 if (latentMap[cell] == -1) {
85                                         latentMap[cell] = px;
86
87                                         if (a1 == a2 && fv->lbound == NEG_INF) {
88                                                 fv->lbound = 1e-6;  // variance must be positive
89                                                 if (fc->est[px] < fv->lbound) {
90                                                         error("Starting value for variance %s is negative", fv->name);
91                                                 }
92                                         }
93                                 } else if (latentMap[cell] != px) {
94                                         // doesn't work for multigroup constraints TODO
95                                         error("In covariance matrix, %s and %s must be constrained equal to preserve symmetry",
96                                               fvg->vars[latentMap[cell]]->name, fv->name);
97                                 }
98                         } else if (matNum == itemNum) {
99                                 omxRaiseErrorf(globalState, "The fitfunction free.set should consist of "
100                                                "latent distribution parameters, excluding item parameters");
101                         }
102                 }
103         }
104         state->haveLatentMap = TRUE;
105 }
106
107 static void buildItemParamMap(omxFitFunction* oo, FitContext *fc)
108 {
109         FreeVarGroup *fvg = fc->varGroup;
110         BA81FitState *state = (BA81FitState *) oo->argStruct;
111         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
112         omxMatrix *itemParam = estate->itemParam;
113         int size = itemParam->cols * state->itemDerivPadSize;
114         state->paramMap.assign(size, -1);  // matrix location to free param index
115         state->itemParamFree.assign(itemParam->rows * itemParam->cols, FALSE);
116
117         size_t numFreeParams = state->numItemParam = fvg->vars.size();
118         state->paramLocations.assign(numFreeParams, 0);
119         state->paramFlavor.assign(numFreeParams, -1);
120
121         for (size_t px=0; px < numFreeParams; px++) {
122                 omxFreeVar *fv = fvg->vars[px];
123                 state->paramLocations[px] = int(fv->locations.size());
124                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
125                         omxFreeVarLocation *loc = &fv->locations[lx];
126                         int matNum = ~loc->matrix;
127                         // prohibit mean & cov TODO
128                         if (matNum == itemParam->matrixNumber) {
129                                 int at = loc->col * state->itemDerivPadSize + loc->row;
130                                 state->paramMap[at] = px;
131                                 state->itemParamFree[loc->col * itemParam->rows + loc->row] = TRUE;
132
133                                 const double *spec = estate->itemSpec[loc->col];
134                                 int id = spec[RPF_ISpecID];
135                                 int flavor;
136                                 double upper, lower;
137                                 (*rpf_model[id].paramInfo)(spec, loc->row, &flavor, &upper, &lower);
138                                 if (state->paramFlavor[px] < 0) {
139                                         state->paramFlavor[px] = flavor;
140                                 } else if (state->paramFlavor[px] != flavor) {
141                                         error("Cannot equate %s with %s[%d,%d]", fv->name,
142                                               itemParam->name, loc->row, loc->col);
143                                 }
144                                 if (fv->lbound == NEG_INF && isfinite(lower)) {
145                                         fv->lbound = lower;
146                                         if (fc->est[px] < fv->lbound) {
147                                                 error("Starting value %s %f less than lower bound %f",
148                                                       fv->name, fc->est[px], lower);
149                                         }
150                                 }
151                                 if (fv->ubound == INF && isfinite(upper)) {
152                                         fv->ubound = upper;
153                                         if (fc->est[px] > fv->ubound) {
154                                                 error("Starting value %s %f greater than upper bound %f",
155                                                       fv->name, fc->est[px], upper);
156                                         }
157                                 }
158                         }
159                 }
160         }
161
162         state->ihessDivisor.resize(size);
163
164         for (int cx=0; cx < itemParam->cols; ++cx) {
165                 const double *spec = estate->itemSpec[cx];
166                 int id = spec[RPF_ISpecID];
167                 int numParam = (*rpf_model[id].numParam)(spec);
168
169                 for (int p1=0; p1 < numParam; p1++) {
170                         int at1 = state->paramMap[cx * state->itemDerivPadSize + p1];
171                         if (at1 < 0) continue;
172
173                         for (int p2=0; p2 <= p1; p2++) {
174                                 int at2 = state->paramMap[cx * state->itemDerivPadSize + p2];
175                                 if (at2 < 0) continue;
176
177                                 if (at1 < at2) std::swap(at1, at2);  // lower triangle
178
179                                 //mxLog("Item %d param(%d,%d) -> H[%d,%d]", cx, p1, p2, at1, at2);
180                                 int at = cx * state->itemDerivPadSize + numParam + triangleLoc1(p1) + p2;
181                                 state->paramMap[at] = numFreeParams + at1 * numFreeParams + at2;
182
183                                 state->ihessDivisor[at] =
184                                         state->paramLocations[at1] * state->paramLocations[at2];
185                         }
186                 }
187         }
188
189         state->haveItemMap = TRUE;
190         //pia(state->paramMap.data(), state->itemDerivPadSize, itemParam->cols);
191 }
192
193 OMXINLINE static double
194 ba81Fit1Ordinate(omxFitFunction* oo, const long qx, const int *quad,
195                  const double *weight, int want, double *myDeriv)
196 {
197         BA81FitState *state = (BA81FitState*) oo->argStruct;
198         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
199         omxMatrix *itemParam = estate->itemParam;
200         int numItems = itemParam->cols;
201         int maxDims = estate->maxDims;
202         int do_fit = want & FF_COMPUTE_FIT;
203         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN | FF_COMPUTE_IHESSIAN);
204
205         double where[maxDims];
206         pointToWhere(estate, quad, where, maxDims);
207
208         double *outcomeProb = NULL;
209         if (do_fit) {
210                 outcomeProb = Realloc(NULL, estate->totalOutcomes, double); // avoid malloc/free? TODO
211                 computeRPF(estate, itemParam, quad, TRUE, outcomeProb);
212         }
213
214         double thr_ll = 0;
215         const double *oProb = outcomeProb;
216         for (int ix=0; ix < numItems; ix++) {
217                 const double *spec = estate->itemSpec[ix];
218                 int id = spec[RPF_ISpecID];
219                 int iOutcomes = estate->itemOutcomes[ix];
220
221                 if (do_fit) {
222                         for (int ox=0; ox < iOutcomes; ox++) {
223                                 double got = weight[ox] * oProb[ox];
224                                 thr_ll += got;
225                         }
226                 }
227
228                 if (do_deriv) {
229                         double *iparam = omxMatrixColumn(itemParam, ix);
230                         double *pad = myDeriv + ix * state->itemDerivPadSize;
231                         (*rpf_model[id].dLL1)(spec, iparam, where, weight, pad);
232                 }
233                 oProb += iOutcomes;
234                 weight += iOutcomes;
235         }
236
237         Free(outcomeProb);
238
239         return thr_ll;
240 }
241
242 static double
243 ba81ComputeMFit1(omxFitFunction* oo, int want, FitContext *fc)
244 {
245         BA81FitState *state = (BA81FitState*) oo->argStruct;
246         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
247         omxMatrix *customPrior = estate->customPrior;
248         omxMatrix *itemParam = estate->itemParam;
249         std::vector<const double*> &itemSpec = estate->itemSpec;
250         int maxDims = estate->maxDims;
251         const int totalOutcomes = estate->totalOutcomes;
252
253         if (estate->verbose) mxLog("%s: em.fit(%d)", oo->matrix->name, want);
254
255         double *thrDeriv = Calloc(itemParam->cols * state->itemDerivPadSize * Global->numThreads, double);
256
257         double ll = 0;
258         if (customPrior) {
259                 omxRecompute(customPrior);
260                 ll = customPrior->data[0];
261                 // need deriv adjustment TODO
262         }
263
264         if (!isfinite(ll)) {
265                 omxPrint(itemParam, "item param");
266                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
267         }
268
269 #pragma omp parallel for num_threads(Global->numThreads) schedule(static,4)
270         for (long qx=0; qx < estate->totalQuadPoints; qx++) {
271                 int quad[maxDims];
272                 decodeLocation(qx, maxDims, estate->quadGridSize, quad);
273                 double *weight = estate->expected + qx * totalOutcomes;
274                 double *myDeriv = thrDeriv + itemParam->cols * state->itemDerivPadSize * omx_absolute_thread_num();
275                 double thr_ll = ba81Fit1Ordinate(oo, qx, quad, weight, want, myDeriv);
276                 
277 #pragma omp atomic
278                 ll += thr_ll;
279         }
280
281         int excluded = 0;
282         int numItems = itemParam->cols;
283
284         if (want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN | FF_COMPUTE_IHESSIAN)) {
285                 double *deriv0 = thrDeriv;
286
287                 int perThread = itemParam->cols * state->itemDerivPadSize;
288                 for (int th=1; th < Global->numThreads; th++) {
289                         double *thrD = thrDeriv + th * perThread;
290                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
291                 }
292
293                 for (int ix=0; ix < numItems; ix++) {
294                         const double *spec = itemSpec[ix];
295                         int id = spec[RPF_ISpecID];
296                         double *iparam = omxMatrixColumn(itemParam, ix);
297                         double *pad = deriv0 + ix * state->itemDerivPadSize;
298                         (*rpf_model[id].dLL2)(spec, iparam, pad);
299                 }
300
301                 int numFreeParams = int(state->numItemParam);
302                 int numParams = itemParam->cols * state->itemDerivPadSize;
303                 for (int ox=0; ox < numParams; ox++) {
304                         int to = state->paramMap[ox];
305                         if (to == -1) continue;
306
307                         // Need to check because this can happen if
308                         // lbounds/ubounds are not set appropriately.
309                         if (0 && !isfinite(deriv0[ox])) {
310                                 int item = ox / itemParam->rows;
311                                 mxLog("item parameters:\n");
312                                 const double *spec = itemSpec[item];
313                                 int id = spec[RPF_ISpecID];
314                                 int numParam = (*rpf_model[id].numParam)(spec);
315                                 double *iparam = omxMatrixColumn(itemParam, item);
316                                 pda(iparam, numParam, 1);
317                                 // Perhaps bounds can be pulled in from librpf? TODO
318                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
319                                       ox, item, deriv0[ox]);
320                         }
321
322                         if (to < numFreeParams) {
323                                 if (want & FF_COMPUTE_GRADIENT) {
324                                         fc->grad[to] += deriv0[ox];
325                                 }
326                         } else {
327                                 if (want & FF_COMPUTE_HESSIAN) {
328                                         int Hto = to - numFreeParams;
329                                         fc->hess[Hto] += deriv0[ox];
330                                 }
331                         }
332                 }
333
334                 if (want & FF_COMPUTE_IHESSIAN) {
335                         for (int ix=0; ix < numItems; ix++) {
336                                 const double *spec = itemSpec[ix];
337                                 int id = spec[RPF_ISpecID];
338                                 int iParams = (*rpf_model[id].numParam)(spec);
339                                 double *pad = deriv0 + ix * state->itemDerivPadSize + iParams;
340                                 int *mask = state->itemParamFree.data() + ix * itemParam->rows;
341                                 double stress;
342                                 omxApproxInvertPackedPosDefTriangular(iParams, mask, pad, &stress);
343                                 if (stress) ++excluded;
344                         }
345
346                         for (int ox=0; ox < numParams; ox++) {
347                                 int to = state->paramMap[ox];
348                                 if (to == -1) continue;
349                                 if (to >= numFreeParams) {
350                                         int Hto = to - numFreeParams;
351                                         fc->ihess[Hto] += deriv0[ox] / state->ihessDivisor[ox];
352                                 }
353                         }
354
355                 }
356         }
357
358         if (excluded) {
359                 // maybe not fatal, but investigation needed
360                 omxRaiseErrorf(globalState, "Hessian not positive definite for %d/%d items",
361                                excluded, numItems);
362         }
363
364         Free(thrDeriv);
365
366         return -ll;
367 }
368
369 void ba81SetFreeVarGroup(omxFitFunction *oo, FreeVarGroup *fvg)
370 {}
371
372 // can use same reorganization to avoid the gram product of where for every pattern TODO
373 static void mapLatentDeriv(BA81FitState *state, BA81Expect *estate, int sgroup, double piece,
374                            const std::vector<double> &derivCoef,
375                            double *derivOut)
376 {
377         int maxAbilities = estate->maxAbilities;
378         int maxDims = estate->maxDims;
379         int pmax = maxDims;
380         if (estate->numSpecific) pmax -= 1;
381
382         if (sgroup == 0) {
383                 int cx = 0;
384                 for (int d1=0; d1 < pmax; ++d1) {
385                         double amt1 = piece * derivCoef[d1];
386 #pragma omp atomic
387                         derivOut[d1] += amt1;
388                         for (int d2=0; d2 <= d1; ++d2) {
389                                 int to = maxAbilities + cx;
390                                 double amt2 = piece * derivCoef[maxDims + cx];
391 #pragma omp atomic
392                                 derivOut[to] += amt2;
393                                 ++cx;
394                         }
395                 }
396         }
397
398         if (estate->numSpecific) {
399                 int sdim = pmax + sgroup;
400                 double amt3 = piece * derivCoef[pmax];
401 #pragma omp atomic
402                 derivOut[sdim] += amt3;
403
404                 double amt4 = piece * derivCoef[maxDims + triangleLoc0(pmax)];
405                 int to = maxAbilities + triangleLoc0(sdim);
406 #pragma omp atomic
407                 derivOut[to] += amt4;
408         }
409 }
410
411 static double *reduceForSpecific(omxMatrix *mat, int maxDims, int sgroup)
412 {
413         double *out = Calloc(maxDims * maxDims, double);
414         for (int d1=0; d1 < maxDims-1; ++d1) {
415                 int cell = d1 * maxDims;
416                 for (int d2=0; d2 < maxDims-1; ++d2) {
417                         out[cell + d2] = omxMatrixElement(mat, d1, d2);
418                 }
419         }
420         int sloc = maxDims-1 + sgroup;
421         out[maxDims*maxDims - 1] = omxMatrixElement(mat, sloc, sloc);
422         return out;
423 }
424
425 static void calcDerivCoef(BA81FitState *state, BA81Expect *estate,
426                           double *where, int sgroup, std::vector<double> *derivCoef)
427 {
428         omxMatrix *mean = estate->latentMeanOut;
429         omxMatrix *cov = estate->latentCovOut;
430         omxMatrix *icov = state->icov;
431         double *covData = cov->data;
432         double *icovData = icov->data;
433         int maxDims = estate->maxDims;
434         const char R='R';
435         const char L='L';
436         const char U='U';
437         const double alpha = 1;
438         const double beta = 0;
439         const int one = 1;
440
441         double *scov = NULL;
442         double *sicov = NULL;
443         if (estate->numSpecific) {
444                 scov = reduceForSpecific(cov, maxDims, sgroup);
445                 covData = scov;
446
447                 sicov = reduceForSpecific(icov, maxDims, sgroup);
448                 icovData = sicov;
449         }
450
451         std::vector<double> whereDiff(maxDims);
452         std::vector<double> whereGram(triangleLoc1(maxDims));
453         for (int d1=0; d1 < maxDims; ++d1) {
454                 whereDiff[d1] = where[d1] - omxVectorElement(mean, d1);
455         }
456         gramProduct(whereDiff.data(), whereDiff.size(), whereGram.data());
457
458         F77_CALL(dsymv)(&U, &maxDims, &alpha, icovData, &maxDims, whereDiff.data(), &one,
459                         &beta, derivCoef->data(), &one);
460
461         std::vector<double> covGrad1(maxDims * maxDims);
462         std::vector<double> covGrad2(maxDims * maxDims);
463
464         int cx=0;
465         for (int d1=0; d1 < maxDims; ++d1) {
466                 for (int d2=0; d2 <= d1; ++d2) {
467                         covGrad1[d2 * maxDims + d1] = covData[d2 * maxDims + d1] - whereGram[cx];
468                         ++cx;
469                 }
470         }
471
472         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, covGrad1.data(), &maxDims, icovData,
473                         &maxDims, &beta, covGrad2.data(), &maxDims);
474         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, icovData, &maxDims, covGrad2.data(),
475                         &maxDims, &beta, covGrad1.data(), &maxDims);
476
477         for (int d1=0; d1 < maxDims; ++d1) {
478                 covGrad1[d1 * maxDims + d1] /= 2.0;
479         }
480
481         cx = maxDims;
482         for (int d1=0; d1 < maxDims; ++d1) {
483                 int cell = d1 * maxDims;
484                 for (int d2=0; d2 <= d1; ++d2) {
485                         (*derivCoef)[cx] = -covGrad1[cell + d2];
486                         ++cx;
487                 }
488         }
489
490         Free(scov);
491         Free(sicov);
492 }
493
494 static bool latentDeriv(omxFitFunction *oo, double *gradient)
495 {
496         omxExpectation *expectation = oo->expectation;
497         BA81FitState *state = (BA81FitState*) oo->argStruct;
498         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
499         if (estate->verbose) mxLog("%s: latentDeriv", oo->matrix->name);
500
501         int numUnique = estate->numUnique;
502         int numSpecific = estate->numSpecific;
503         int maxDims = estate->maxDims;
504         int maxAbilities = estate->maxAbilities;
505         int primaryDims = maxDims;
506         omxMatrix *cov = estate->latentCovOut;
507         int *numIdentical = estate->numIdentical;
508         double *patternLik = estate->patternLik;
509
510         OMXZERO(patternLik, numUnique);
511
512         omxCopyMatrix(state->icov, cov);
513
514         int info;
515         omxDPOTRF(state->icov, &info);
516         if (info != 0) {
517                 if (info < 0) error("dpotrf invalid argument %d", -info);
518                 return FALSE;
519         }
520         omxDPOTRI(state->icov, &info);
521         if (info != 0) {
522                 if (info < 0) error("dpotri invalid argument %d", -info);
523                 return FALSE;
524         }
525         // fill in rest from upper triangle
526         for (int rx=1; rx < maxAbilities; ++rx) {
527                 for (int cx=0; cx < rx; ++cx) {
528                         omxSetMatrixElement(state->icov, rx, cx, omxMatrixElement(state->icov, cx, rx));
529                 }
530         }
531
532         int maxDerivCoef = maxDims + triangleLoc1(maxDims);
533         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
534         double *uniqueDeriv = Calloc(numUnique * numLatents, double);
535
536         if (numSpecific == 0) {
537 #pragma omp parallel for num_threads(Global->numThreads)
538                 for (long qx=0; qx < estate->totalQuadPoints; qx++) {
539                         const int thrId = omx_absolute_thread_num();
540                         int quad[maxDims];
541                         decodeLocation(qx, maxDims, estate->quadGridSize, quad);
542                         double where[maxDims];
543                         pointToWhere(estate, quad, where, maxDims);
544                         std::vector<double> derivCoef(maxDerivCoef);
545                         calcDerivCoef(state, estate, where, 0, &derivCoef);
546                         double area = estate->priQarea[qx];
547                         double *lxk = ba81LikelihoodFast1(expectation, thrId, 0, qx);
548
549                         for (int px=0; px < numUnique; px++) {
550                                 double tmp = (lxk[px] * area);
551 #pragma omp atomic
552                                 patternLik[px] += tmp;
553                                 mapLatentDeriv(state, estate, 0, tmp, derivCoef,
554                                                uniqueDeriv + px * numLatents);
555                         }
556                 }
557         } else {
558                 primaryDims -= 1;
559                 int sDim = primaryDims;
560                 long specificPoints = estate->quadGridSize;
561
562 #pragma omp parallel for num_threads(Global->numThreads)
563                 for (long qx=0; qx < estate->totalPrimaryPoints; qx++) {
564                         const int thrId = omx_absolute_thread_num();
565                         int quad[maxDims];
566                         decodeLocation(qx, primaryDims, estate->quadGridSize, quad);
567
568                         cai2010(expectation, thrId, FALSE, qx);
569                         double *allElxk = eBase(estate, thrId);
570                         double *Eslxk = esBase(estate, thrId);
571
572                         for (long sx=0; sx < specificPoints; sx++) {
573                                 long qloc = qx * specificPoints + sx;
574                                 quad[sDim] = sx;
575                                 double where[maxDims];
576                                 pointToWhere(estate, quad, where, maxDims);
577                                 for (int sgroup=0; sgroup < numSpecific; sgroup++) {
578                                         std::vector<double> derivCoef(maxDerivCoef);
579                                         calcDerivCoef(state, estate, where, sgroup, &derivCoef);
580                                         double area = areaProduct(estate, qx, sx, sgroup);
581                                         double *lxk = ba81LikelihoodFast1(expectation, thrId, sgroup, qloc);
582                                         for (int px=0; px < numUnique; px++) {
583                                                 double Ei = allElxk[px];
584                                                 double Eis = Eslxk[sgroup * numUnique + px];
585                                                 double tmp = ((Ei / Eis) * lxk[px] * area);
586                                                 mapLatentDeriv(state, estate, sgroup, tmp, derivCoef,
587                                                                uniqueDeriv + px * numLatents);
588                                         }
589                                 }
590                         }
591
592                         double priArea = estate->priQarea[qx];
593                         for (int px=0; px < numUnique; px++) {
594                                 double Ei = allElxk[px];
595                                 double tmp = (Ei * priArea);
596 #pragma omp atomic
597                                 patternLik[px] += tmp;
598                         }
599                 }
600         }
601
602         /*
603         std::vector<double> hess1(triangleLoc1(numLatents));
604         std::vector<double> hessSum(triangleLoc1(numLatents));
605
606         // could run nicely in parallel with numUnique * triangleLoc(numLatents) buffer
607         for (int px=0; px < numUnique; ++px) {
608                 gramProduct(uniqueDeriv + px * numLatents, numLatents, hess1.data());
609                 double dups = numIdentical[px];
610                 for (int rx=0; rx < triangleLoc1(numLatents); ++rx) {
611                         hessSum[rx] += hess1[rx] * dups;
612                 }
613         }
614         */
615
616 #pragma omp parallel for num_threads(Global->numThreads)
617         for (int px=0; px < numUnique; ++px) {
618                 double weight = numIdentical[px] / patternLik[px];
619                 for (int rx=0; rx < numLatents; ++rx) {
620                         uniqueDeriv[px * numLatents + rx] *= weight;
621                 }
622         }
623
624 #pragma omp parallel for num_threads(Global->numThreads)
625         for (int rx=0; rx < numLatents; ++rx) {
626                 for (int px=1; px < numUnique; ++px) {
627                         uniqueDeriv[rx] += uniqueDeriv[px * numLatents + rx];
628                 }
629         }
630
631         for (int l1=0; l1 < numLatents; ++l1) {
632                 int t1 = state->latentMap[l1];
633                 if (t1 < 0) continue;
634                 gradient[t1] -= 2 * uniqueDeriv[l1];
635
636                 /*
637                 for (int l2=0; l2 <= l1; ++l2) {
638                         int t2 = state->latentMap[l2];
639                         if (t2 < 0) continue;
640                         hessian[numLatents * t1 + t2] -= 2 * hessSum[triangleLoc1(l1) + l2];
641                 }
642                 */
643         }
644
645         Free(uniqueDeriv);
646
647         return TRUE;
648 }
649
650 static void setLatentStartingValues(omxFitFunction *oo, FitContext *fc)
651 {
652         BA81FitState *state = (BA81FitState*) oo->argStruct;
653         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
654         std::vector<int> &latentMap = state->latentMap;
655         std::vector<double> &ElatentMean = estate->ElatentMean;
656         std::vector<double> &ElatentCov = estate->ElatentCov;
657         int maxAbilities = estate->maxAbilities;
658
659         for (int a1 = 0; a1 < maxAbilities; ++a1) {
660                 if (latentMap[a1] >= 0) {
661                         int to = latentMap[a1];
662                         fc->est[to] = ElatentMean[a1];
663                 }
664
665                 for (int a2 = 0; a2 <= a1; ++a2) {
666                         int to = latentMap[maxAbilities + triangleLoc1(a1) + a2];
667                         if (to < 0) continue;
668                         fc->est[to] = ElatentCov[a1 * maxAbilities + a2];
669                 }
670         }
671
672         //fc->log("setLatentStartingValues", FF_COMPUTE_ESTIMATE);
673 }
674
675 static double
676 ba81ComputeFit(omxFitFunction* oo, int want, FitContext *fc)
677 {
678         BA81FitState *state = (BA81FitState*) oo->argStruct;
679         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
680
681         if (want & FF_COMPUTE_POSTOPTIMIZE) return 0;
682
683         if (estate->type == EXPECTATION_AUGMENTED) {
684                 if (!state->haveItemMap) buildItemParamMap(oo, fc);
685
686                 if (state->numItemParam != fc->varGroup->vars.size()) error("mismatch"); // remove TODO
687
688                 if (want & FF_COMPUTE_PARAMFLAVOR) {
689                         for (int px=0; px < state->numItemParam; ++px) {
690                                 if (state->paramFlavor[px] < 0) continue;
691                                 fc->flavor[px] = state->paramFlavor[px];
692                         }
693                 }
694
695                 if (want & FF_COMPUTE_PREOPTIMIZE) {
696                         // schilling_bock_2005_rescale(oo, fc); seems counterproductive
697                         return 0;
698                 }
699
700                 double got = ba81ComputeMFit1(oo, want, fc);
701                 return got;
702         } else if (estate->type == EXPECTATION_OBSERVED) {
703                 if (!state->haveLatentMap) buildLatentParamMap(oo, fc);
704
705                 if (want & FF_COMPUTE_PREOPTIMIZE) {
706                         setLatentStartingValues(oo, fc);
707                         return 0;
708                 }
709
710                 omxExpectationCompute(oo->expectation, NULL);
711
712                 if (want & FF_COMPUTE_GRADIENT) {
713                         if (!latentDeriv(oo, fc->grad)) {
714                                 return INFINITY;
715                         }
716                 }
717
718                 if (want & FF_COMPUTE_HESSIAN) {
719                         warning("%s: Hessian is not available for latent distribution parameters", oo->matrix->name);
720                 }
721
722                 if (want & FF_COMPUTE_FIT) {
723                         double *patternLik = estate->patternLik;
724                         int *numIdentical = estate->numIdentical;
725                         int numUnique = estate->numUnique;
726                         estate->excludedPatterns = 0;
727                         double got = 0;
728 #pragma omp parallel for num_threads(Global->numThreads) schedule(static,64) reduction(+:got)
729                         for (int ux=0; ux < numUnique; ux++) {
730                                 if (!validPatternLik(patternLik[ux])) {
731 #pragma omp atomic
732                                         ++estate->excludedPatterns;
733                                         // somehow indicate that this -2LL is provisional TODO
734                                         continue;
735                                 }
736                                 got += numIdentical[ux] * log(patternLik[ux]);
737                         }
738                         if (estate->verbose) mxLog("%s: fit (%d/%d excluded)",
739                                                    oo->matrix->name, estate->excludedPatterns, numUnique);
740                         //mxLog("fit %.2f", -2 * got);
741                         return -2 * got;
742                 }
743
744                 // if (want & FF_COMPUTE_POSTOPTIMIZE)  discard lxk cache? TODO
745
746                 return 0;
747         } else {
748                 error("Confused");
749         }
750 }
751
752 static void ba81Compute(omxFitFunction *oo, int want, FitContext *fc)
753 {
754         if (!want) return;
755         double got = ba81ComputeFit(oo, want, fc);
756         if (got) oo->matrix->data[0] = got;
757 }
758
759 BA81FitState::~BA81FitState()
760 {
761         Free(tmpLatentMean);
762         Free(tmpLatentCov);
763         omxFreeAllMatrixData(icov);
764         omxFreeAllMatrixData(cholCov);
765 }
766
767 static void ba81Destroy(omxFitFunction *oo) {
768         BA81FitState *state = (BA81FitState *) oo->argStruct;
769         delete state;
770 }
771
772 void omxInitFitFunctionBA81(omxFitFunction* oo)
773 {
774         if (!oo->argStruct) { // ugh!
775                 BA81FitState *state = new BA81FitState;
776                 oo->argStruct = state;
777         }
778
779         BA81FitState *state = (BA81FitState*) oo->argStruct;
780
781         omxExpectation *expectation = oo->expectation;
782         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
783
784         //newObj->data = oo->expectation->data;
785
786         oo->computeFun = ba81Compute;
787         oo->setVarGroup = ba81SetFreeVarGroup;
788         oo->destructFun = ba81Destroy;
789         oo->gradientAvailable = TRUE;
790         oo->hessianAvailable = TRUE;
791         oo->parametersHaveFlavor = TRUE;
792
793         int maxParam = estate->itemParam->rows;
794         state->itemDerivPadSize = maxParam + triangleLoc1(maxParam);
795
796         int maxAbilities = estate->maxAbilities;
797
798         state->tmpLatentMean = Realloc(NULL, estate->maxDims, double);
799         state->tmpLatentCov = Realloc(NULL, estate->maxDims * estate->maxDims, double);
800
801         int numItems = estate->itemParam->cols;
802         for (int ix=0; ix < numItems; ix++) {
803                 const double *spec = estate->itemSpec[ix];
804                 int id = spec[RPF_ISpecID];
805                 if (id < 0 || id >= rpf_numModels) {
806                         error("ItemSpec %d has unknown item model %d", ix, id);
807                 }
808         }
809
810         state->icov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
811         state->cholCov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
812 }