Newton-Raphson doesn't need fit; don't compute it
[openmx:openmx.git] / src / omxFitFunctionBA81.cpp
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "omxFitFunction.h"
19 #include "omxExpectationBA81.h"
20 #include "omxOpenmpWrap.h"
21 #include "libifa-rpf.h"
22
23 static const char *NAME = "FitFunctionBA81";
24
25 struct BA81FitState {
26
27         omxMatrix *itemParam;     // M step version
28         std::vector<int> latentMap;
29         int itemDerivPadSize;     // maxParam + maxParam*(1+maxParam)/2
30         int *paramMap;            // itemParam->cols * itemDerivPadSize -> index of free parameter
31         std::vector<int> NAtriangle; // TODO remove
32         omxMatrix *customPrior;
33         int choleskyError;
34         double *tmpLatentMean;    // maxDims
35         double *tmpLatentCov;     // maxDims * maxDims ; only lower triangle is used
36         omxMatrix *icov;          // inverse covariance matrix
37         int fitCount;             // dubious, remove? TODO
38         int gradientCount;        // dubious, remove? TODO
39
40         std::vector< FreeVarGroup* > varGroups;
41         size_t numItemParam;
42
43         BA81FitState();
44         ~BA81FitState();
45 };
46
47 BA81FitState::BA81FitState()
48 {
49         itemParam = NULL;
50         paramMap = NULL;
51         customPrior = NULL;
52         fitCount = 0;
53         gradientCount = 0;
54         tmpLatentMean = NULL;
55         tmpLatentCov = NULL;
56 }
57
58 static void buildLatentParamMap(omxFitFunction* oo, FreeVarGroup *fvg)
59 {
60         // if no latent param, need a flag to determine whether to initialize TODO
61
62         BA81FitState *state = (BA81FitState *) oo->argStruct;
63         std::vector<int> &latentMap = state->latentMap;
64         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
65         int meanNum = estate->latentMeanOut->matrixNumber;
66         int covNum = estate->latentCovOut->matrixNumber;
67         int maxAbilities = estate->maxAbilities;
68         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
69
70         latentMap.assign(numLatents, -1);
71
72         int numParam = int(fvg->vars.size());
73         for (int px=0; px < numParam; px++) {
74                 omxFreeVar *fv = fvg->vars[px];
75                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
76                         omxFreeVarLocation *loc = &fv->locations[lx];
77                         int matNum = ~loc->matrix;
78                         if (matNum == meanNum) {
79                                 latentMap[loc->row + loc->col] = px;
80                         } else if (matNum == covNum) {
81                                 int a1 = loc->row;
82                                 int a2 = loc->col;
83                                 if (a1 < a2) std::swap(a1, a2);
84                                 int cell = maxAbilities + triangleLoc1(a1) + a2;
85                                 if (latentMap[cell] == -1)
86                                         latentMap[cell] = px;
87                                 else if (latentMap[cell] != px) {
88                                         // doesn't work for multigroup constraints TODO
89                                         error("In covariance matrix, %s and %s must be constrained equal to preserve symmetry",
90                                               fvg->vars[latentMap[cell]]->name, fv->name);
91                                 }
92                                 if (a1 == a2 && fv->lbound == NEG_INF) {
93                                         fv->lbound = 1e-6;  // variance must be positive
94                                 }
95                         }
96                 }
97         }
98 }
99
100 static void buildItemParamMap(omxFitFunction* oo, FreeVarGroup *fvg)
101 {
102         BA81FitState *state = (BA81FitState *) oo->argStruct;
103         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
104         omxMatrix *itemParam = state->itemParam;
105         int size = itemParam->cols * state->itemDerivPadSize;
106         state->paramMap = Realloc(NULL, size, int);  // matrix location to free param index
107         for (int px=0; px < size; px++) {
108                 state->paramMap[px] = -1;
109         }
110
111         size_t numFreeParams = state->numItemParam = fvg->vars.size();
112         int *pRow = Realloc(NULL, numFreeParams, int);
113         int *pCol = Realloc(NULL, numFreeParams, int);
114
115         for (size_t px=0; px < numFreeParams; px++) {
116                 pRow[px] = -1;
117                 pCol[px] = -1;
118                 omxFreeVar *fv = fvg->vars[px];
119                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
120                         omxFreeVarLocation *loc = &fv->locations[lx];
121                         int matNum = ~loc->matrix;
122                         if (matNum == itemParam->matrixNumber) {
123                                 pRow[px] = loc->row;
124                                 pCol[px] = loc->col;
125                                 int at = pCol[px] * state->itemDerivPadSize + pRow[px];
126                                 state->paramMap[at] = px;
127
128                                 const double *spec = estate->itemSpec[loc->col];
129                                 int id = spec[RPF_ISpecID];
130                                 double upper, lower;
131                                 (*rpf_model[id].paramBound)(spec, loc->row, &upper, &lower);
132                                 if (fv->lbound == NEG_INF && isfinite(lower)) fv->lbound = lower;
133                                 if (fv->ubound == INF && isfinite(upper)) fv->ubound = upper;
134                         }
135                 }
136         }
137
138         for (size_t p1=0; p1 < numFreeParams; p1++) {
139                 for (size_t p2=p1; p2 < numFreeParams; p2++) {
140                         if (pCol[p1] == -1 || pCol[p1] != pCol[p2]) continue;
141                         const double *spec = estate->itemSpec[pCol[p1]];
142                         int id = spec[RPF_ISpecID];
143                         int numParam = (*rpf_model[id].numParam)(spec);
144                         int r1 = pRow[p1];
145                         int r2 = pRow[p2];
146                         if (r1 > r2) { int tmp=r1; r1=r2; r2=tmp; }
147                         int rowOffset = 0;
148                         for (int rx=1; rx <= r2; rx++) rowOffset += rx;
149                         int at = pCol[p1] * state->itemDerivPadSize + numParam + rowOffset + r1;
150                         state->paramMap[at] = numFreeParams + p1 * numFreeParams + p2;
151                         if (p2 != p1) state->NAtriangle.push_back(p2 * numFreeParams + p1);
152                 }
153         }
154
155         Free(pRow);
156         Free(pCol);
157 }
158
159 OMXINLINE static double
160 ba81Fit1Ordinate(omxFitFunction* oo, const int *quad, const double *weight, int want, double *myDeriv)
161 {
162         BA81FitState *state = (BA81FitState*) oo->argStruct;
163         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
164         omxMatrix *itemParam = state->itemParam;
165         int numItems = itemParam->cols;
166         int maxOutcomes = estate->maxOutcomes;
167         int maxDims = estate->maxDims;
168         int do_fit = want & FF_COMPUTE_FIT;
169         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN);
170
171         double where[maxDims];
172         pointToWhere(estate, quad, where, maxDims);
173
174         double *outcomeProb = NULL;
175         if (do_fit) {
176                 outcomeProb = computeRPF(estate, itemParam, quad); // avoid malloc/free? TODO
177                 if (!outcomeProb) return 0;
178         }
179
180         double thr_ll = 0;
181         for (int ix=0; ix < numItems; ix++) {
182                 const double *spec = estate->itemSpec[ix];
183                 int id = spec[RPF_ISpecID];
184                 int iOutcomes = spec[RPF_ISpecOutcomes];
185
186                 double area = exp(logAreaProduct(estate, quad, estate->Sgroup[ix]));   // avoid exp() here? TODO
187                 if (do_fit) {
188                         for (int ox=0; ox < iOutcomes; ox++) {
189 #if 0
190 #pragma omp critical(ba81Fit1OrdinateDebug1)
191                                 if (!std::isfinite(outcomeProb[ix * maxOutcomes + ox])) {
192                                         pda(itemParam->data, itemParam->rows, itemParam->cols);
193                                         pda(outcomeProb, outcomes, numItems);
194                                         error("RPF produced NAs");
195                                 }
196 #endif
197                                 double got = weight[ox] * outcomeProb[ix * maxOutcomes + ox];
198                                 thr_ll += got * area;
199                         }
200                 }
201
202                 if (do_deriv) {
203                         double *iparam = omxMatrixColumn(itemParam, ix);
204                         double *pad = myDeriv + ix * state->itemDerivPadSize;
205                         (*rpf_model[id].dLL1)(spec, iparam, where, area, weight, pad);
206                 }
207                 weight += iOutcomes;
208         }
209
210         Free(outcomeProb);
211
212         return thr_ll;
213 }
214
215 static double
216 ba81ComputeMFit1(omxFitFunction* oo, int want, double *gradient, double *hessian)
217 {
218         BA81FitState *state = (BA81FitState*) oo->argStruct;
219         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
220         omxMatrix *customPrior = state->customPrior;
221         omxMatrix *itemParam = state->itemParam;
222         std::vector<const double*> &itemSpec = estate->itemSpec;   // need c++11 auto here TODO
223         int maxDims = estate->maxDims;
224         const int totalOutcomes = estate->totalOutcomes;
225
226         double *thrDeriv = Calloc(itemParam->cols * state->itemDerivPadSize * Global->numThreads, double);
227
228         double ll = 0;
229         if (customPrior) {
230                 omxRecompute(customPrior);
231                 ll = customPrior->data[0];
232                 // need deriv adjustment TODO
233         }
234
235         if (!isfinite(ll)) {
236                 omxPrint(itemParam, "item param");
237                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
238         }
239
240 #pragma omp parallel for num_threads(Global->numThreads)
241         for (long qx=0; qx < estate->totalQuadPoints; qx++) {
242                 //double area = exp(state->priLogQarea[qx]);  // avoid exp() here? TODO
243                 int quad[maxDims];
244                 decodeLocation(qx, maxDims, estate->quadGridSize, quad);
245                 double *weight = estate->expected + qx * totalOutcomes;
246                 double *myDeriv = thrDeriv + itemParam->cols * state->itemDerivPadSize * omx_absolute_thread_num();
247                 double thr_ll = ba81Fit1Ordinate(oo, quad, weight, want, myDeriv);
248                 
249 #pragma omp atomic
250                 ll += thr_ll;
251         }
252
253         if (gradient) {
254                 double *deriv0 = thrDeriv;
255
256                 int perThread = itemParam->cols * state->itemDerivPadSize;
257                 for (int th=1; th < Global->numThreads; th++) {
258                         double *thrD = thrDeriv + th * perThread;
259                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
260                 }
261
262                 int numItems = itemParam->cols;
263                 for (int ix=0; ix < numItems; ix++) {
264                         const double *spec = itemSpec[ix];
265                         int id = spec[RPF_ISpecID];
266                         double *iparam = omxMatrixColumn(itemParam, ix);
267                         double *pad = deriv0 + ix * state->itemDerivPadSize;
268                         (*rpf_model[id].dLL2)(spec, iparam, pad);
269                 }
270
271                 int numFreeParams = int(state->numItemParam);
272                 int numParams = itemParam->cols * state->itemDerivPadSize;
273                 for (int ox=0; ox < numParams; ox++) {
274                         int to = state->paramMap[ox];
275                         if (to == -1) continue;
276
277                         // Need to check because this can happen if
278                         // lbounds/ubounds are not set appropriately.
279                         if (0 && !isfinite(deriv0[ox])) {
280                                 int item = ox / itemParam->rows;
281                                 mxLog("item parameters:\n");
282                                 const double *spec = itemSpec[item];
283                                 int id = spec[RPF_ISpecID];
284                                 int numParam = (*rpf_model[id].numParam)(spec);
285                                 double *iparam = omxMatrixColumn(itemParam, item);
286                                 pda(iparam, numParam, 1);
287                                 // Perhaps bounds can be pulled in from librpf? TODO
288                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
289                                       ox, item, deriv0[ox]);
290                         }
291
292                         if (to < numFreeParams) {
293                                 gradient[to] -= deriv0[ox];
294                         } else {
295                                 hessian[to - numFreeParams] -= deriv0[ox];
296                         }
297                 }
298         }
299
300         Free(thrDeriv);
301
302         return -ll;
303 }
304
305 static void
306 moveLatentDistribution(omxFitFunction *oo, FitContext *fc,
307                        double *ElatentMean, double *ElatentCov)
308 {
309         BA81FitState *state = (BA81FitState*) oo->argStruct;
310         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
311         std::vector<const double*> &itemSpec = estate->itemSpec;   // need c++11 auto here TODO
312         omxMatrix *itemParam = state->itemParam;
313         omxMatrix *design = estate->design;
314         double *tmpLatentMean = state->tmpLatentMean;
315         double *tmpLatentCov = state->tmpLatentCov;
316         int maxDims = estate->maxDims;
317         int maxAbilities = estate->maxAbilities;
318
319         int numItems = itemParam->cols;
320         for (int ix=0; ix < numItems; ix++) {
321                 const double *spec = itemSpec[ix];
322                 int id = spec[RPF_ISpecID];
323                 const double *rawDesign = omxMatrixColumn(design, ix);
324                 int idesign[design->rows];
325                 int idx = 0;
326                 for (int dx=0; dx < design->rows; dx++) {
327                         if (isfinite(rawDesign[dx])) {
328                                 idesign[idx++] = rawDesign[dx]-1;
329                         } else {
330                                 idesign[idx++] = -1;
331                         }
332                 }
333                 for (int d1=0; d1 < idx; d1++) {
334                         if (idesign[d1] == -1) {
335                                 tmpLatentMean[d1] = 0;
336                         } else {
337                                 tmpLatentMean[d1] = ElatentMean[idesign[d1]];
338                         }
339                         for (int d2=0; d2 <= d1; d2++) {
340                                 int cell = idesign[d2] * maxAbilities + idesign[d1];
341                                 if (idesign[d1] == -1 || idesign[d2] == -1) {
342                                         tmpLatentCov[d2 * maxDims + d1] = d1==d2? 1 : 0;
343                                 } else {
344                                         tmpLatentCov[d2 * maxDims + d1] = ElatentCov[cell];
345                                 }
346                         }
347                 }
348                 if (1) {  // ease debugging, make optional TODO
349                         for (int d1=idx; d1 < maxDims; d1++) tmpLatentMean[d1] = nan("");
350                         for (int d1=0; d1 < maxDims; d1++) {
351                                 for (int d2=0; d2 < maxDims; d2++) {
352                                         if (d1 < idx && d2 < idx) continue;
353                                         tmpLatentCov[d2 * maxDims + d1] = nan("");
354                                 }
355                         }
356                 }
357                 double *iparam = omxMatrixColumn(itemParam, ix);
358                 int *mask = state->paramMap + state->itemDerivPadSize * ix;
359                 rpf_model[id].rescale(spec, iparam, mask, tmpLatentMean, tmpLatentCov);
360         }
361
362         int numFreeParams = int(fc->varGroup->vars.size());
363         for (int rx=0; rx < itemParam->rows; rx++) {
364                 for (int cx=0; cx < itemParam->cols; cx++) {
365                         int vx = state->paramMap[cx * state->itemDerivPadSize + rx];
366                         if (vx >= 0 && vx < numFreeParams) {
367                                 fc->est[vx] = omxMatrixElement(itemParam, rx, cx);
368                         }
369                 }
370         }
371 }
372
373 static void
374 schilling_bock_2005_rescale(omxFitFunction *oo, FitContext *fc)
375 {
376         BA81FitState *state = (BA81FitState*) oo->argStruct;
377         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
378         double *ElatentMean = estate->ElatentMean;
379         double *ElatentCov = estate->ElatentCov;
380         int maxAbilities = estate->maxAbilities;
381
382         //mxLog("schilling bock\n");
383         //pda(ElatentMean, maxAbilities, 1);
384         //pda(ElatentCov, maxAbilities, maxAbilities);
385         //omxPrint(design, "design");
386
387         // use omxDPOTRF instead? TODO
388         const char triangle = 'L';
389         F77_CALL(dpotrf)(&triangle, &maxAbilities, ElatentCov, &maxAbilities, &state->choleskyError);
390         if (state->choleskyError != 0) {
391                 warning("Cholesky failed with %d; rescaling disabled", state->choleskyError); // make error TODO?
392                 return;
393         }
394
395         moveLatentDistribution(oo, fc, ElatentMean, ElatentCov);
396 }
397
398 void ba81SetFreeVarGroup(omxFitFunction *oo, FreeVarGroup *fvg)
399 {}
400
401 // can use same reorganization to avoid the gram product of where for every pattern TODO
402 static void mapLatentDeriv(BA81FitState *state, BA81Expect *estate, int sgroup, double piece,
403                            const std::vector<double> &derivCoef,
404                            double *derivOut)
405 {
406         int maxAbilities = estate->maxAbilities;
407         int maxDims = estate->maxDims;
408         int pmax = maxDims;
409         if (estate->numSpecific) pmax -= 1;
410
411         if (sgroup == 0) {
412                 int cx = 0;
413                 for (int d1=0; d1 < pmax; ++d1) {
414                         double amt1 = piece * derivCoef[d1];
415 #pragma omp atomic
416                         derivOut[d1] += amt1;
417                         for (int d2=0; d2 <= d1; ++d2) {
418                                 int to = maxAbilities + cx;
419                                 double amt2 = piece * derivCoef[maxDims + cx];
420 #pragma omp atomic
421                                 derivOut[to] += amt2;
422                                 ++cx;
423                         }
424                 }
425         }
426
427         if (estate->numSpecific) {
428                 int sdim = pmax + sgroup;
429                 double amt3 = piece * derivCoef[pmax];
430 #pragma omp atomic
431                 derivOut[sdim] += amt3;
432
433                 double amt4 = piece * derivCoef[maxDims + triangleLoc0(pmax)];
434                 int to = maxAbilities + triangleLoc0(sdim);
435 #pragma omp atomic
436                 derivOut[to] += amt4;
437         }
438 }
439
440 static void gramProduct(double *vec, size_t len, double *out)
441 {
442         int cell = 0;
443         for (size_t v1=0; v1 < len; ++v1) {
444                 for (size_t v2=0; v2 <= v1; ++v2) {
445                         out[cell] = vec[v1] * vec[v2];
446                         ++cell;
447                 }
448         }
449 }
450
451 static double *reduceForSpecific(omxMatrix *mat, int maxDims, int sgroup)
452 {
453         double *out = Calloc(maxDims * maxDims, double);
454         for (int d1=0; d1 < maxDims-1; ++d1) {
455                 int cell = d1 * maxDims;
456                 for (int d2=0; d2 < maxDims-1; ++d2) {
457                         out[cell + d2] = omxMatrixElement(mat, d1, d2);
458                 }
459         }
460         int sloc = maxDims-1 + sgroup;
461         out[maxDims*maxDims - 1] = omxMatrixElement(mat, sloc, sloc);
462         return out;
463 }
464
465 static void calcDerivCoef(BA81FitState *state, BA81Expect *estate,
466                           double *where, int sgroup, std::vector<double> *derivCoef)
467 {
468         omxMatrix *mean = estate->latentMeanOut;
469         omxMatrix *cov = estate->latentCovOut;
470         omxMatrix *icov = state->icov;
471         double *covData = cov->data;
472         double *icovData = icov->data;
473         int maxDims = estate->maxDims;
474         const char R='R';
475         const char L='L';
476         const char U='U';
477         const double alpha = 1;
478         const double beta = 0;
479         const int one = 1;
480
481         double *scov = NULL;
482         double *sicov = NULL;
483         if (estate->numSpecific) {
484                 scov = reduceForSpecific(cov, maxDims, sgroup);
485                 covData = scov;
486
487                 sicov = reduceForSpecific(icov, maxDims, sgroup);
488                 icovData = sicov;
489         }
490
491         std::vector<double> whereDiff(maxDims);
492         std::vector<double> whereGram(triangleLoc1(maxDims));
493         for (int d1=0; d1 < maxDims; ++d1) {
494                 whereDiff[d1] = where[d1] - omxVectorElement(mean, d1);
495         }
496         gramProduct(whereDiff.data(), whereDiff.size(), whereGram.data());
497
498         F77_CALL(dsymv)(&U, &maxDims, &alpha, icovData, &maxDims, whereDiff.data(), &one,
499                         &beta, derivCoef->data(), &one);
500
501         std::vector<double> covGrad1(maxDims * maxDims);
502         std::vector<double> covGrad2(maxDims * maxDims);
503
504         int cx=0;
505         for (int d1=0; d1 < maxDims; ++d1) {
506                 for (int d2=0; d2 <= d1; ++d2) {
507                         covGrad1[d2 * maxDims + d1] = covData[d2 * maxDims + d1] - whereGram[cx];
508                         ++cx;
509                 }
510         }
511
512         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, covGrad1.data(), &maxDims, icovData,
513                         &maxDims, &beta, covGrad2.data(), &maxDims);
514         F77_CALL(dsymm)(&R, &L, &maxDims, &maxDims, &alpha, icovData, &maxDims, covGrad2.data(),
515                         &maxDims, &beta, covGrad1.data(), &maxDims);
516
517         for (int d1=0; d1 < maxDims; ++d1) {
518                 covGrad1[d1 * maxDims + d1] /= 2.0;
519         }
520
521         cx = maxDims;
522         for (int d1=0; d1 < maxDims; ++d1) {
523                 int cell = d1 * maxDims;
524                 for (int d2=0; d2 <= d1; ++d2) {
525                         (*derivCoef)[cx] = -covGrad1[cell + d2];
526                         ++cx;
527                 }
528         }
529
530         Free(scov);
531         Free(sicov);
532 }
533
534 static bool latentDeriv(omxFitFunction *oo, double *gradient)
535 {
536         omxExpectation *expectation = oo->expectation;
537         BA81FitState *state = (BA81FitState*) oo->argStruct;
538         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
539         int numUnique = estate->numUnique;
540         int numSpecific = estate->numSpecific;
541         int maxDims = estate->maxDims;
542         int maxAbilities = estate->maxAbilities;
543         int primaryDims = maxDims;
544         omxMatrix *cov = estate->latentCovOut;
545         int *numIdentical = estate->numIdentical;
546         double *patternLik = estate->patternLik;
547
548         OMXZERO(patternLik, numUnique);
549         Free(estate->_logPatternLik);
550
551         omxCopyMatrix(state->icov, cov);
552
553         int info;
554         omxDPOTRF(state->icov, &info);
555         if (info != 0) {
556                 if (info < 0) error("dpotrf invalid argument %d", -info);
557                 return FALSE;
558         }
559         omxDPOTRI(state->icov, &info);
560         if (info != 0) {
561                 if (info < 0) error("dpotri invalid argument %d", -info);
562                 return FALSE;
563         }
564         // fill in rest from upper triangle
565         for (int rx=1; rx < maxAbilities; ++rx) {
566                 for (int cx=0; cx < rx; ++cx) {
567                         omxSetMatrixElement(state->icov, rx, cx, omxMatrixElement(state->icov, cx, rx));
568                 }
569         }
570
571         int maxDerivCoef = maxDims + triangleLoc1(maxDims);
572         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
573         double *uniqueDeriv = Calloc(numUnique * numLatents, double);
574
575         if (numSpecific == 0) {
576 #pragma omp parallel for num_threads(Global->numThreads)
577                 for (long qx=0; qx < estate->totalQuadPoints; qx++) {
578                         int quad[maxDims];
579                         decodeLocation(qx, maxDims, estate->quadGridSize, quad);
580                         double where[maxDims];
581                         pointToWhere(estate, quad, where, maxDims);
582                         std::vector<double> derivCoef(maxDerivCoef);
583                         calcDerivCoef(state, estate, where, 0, &derivCoef);
584                         double logArea = estate->priLogQarea[qx];
585                         double *lxk = ba81LikelihoodFast(expectation, 0, quad);
586
587                         for (int px=0; px < numUnique; px++) {
588                                 double tmp = exp(lxk[px] + logArea);
589 #pragma omp atomic
590                                 patternLik[px] += tmp;
591                                 mapLatentDeriv(state, estate, 0, tmp, derivCoef,
592                                                uniqueDeriv + px * numLatents);
593                         }
594                 }
595         } else {
596                 primaryDims -= 1;
597                 int sDim = primaryDims;
598                 long specificPoints = estate->quadGridSize;
599
600 #pragma omp parallel for num_threads(Global->numThreads)
601                 for (long qx=0; qx < estate->totalPrimaryPoints; qx++) {
602                         int quad[maxDims];
603                         decodeLocation(qx, primaryDims, estate->quadGridSize, quad);
604
605                         cai2010(expectation, FALSE, quad);
606
607                         for (long sx=0; sx < specificPoints; sx++) {
608                                 quad[sDim] = sx;
609                                 double where[maxDims];
610                                 pointToWhere(estate, quad, where, maxDims);
611                                 for (int sgroup=0; sgroup < numSpecific; sgroup++) {
612                                         std::vector<double> derivCoef(maxDerivCoef);
613                                         calcDerivCoef(state, estate, where, sgroup, &derivCoef);
614                                         double logArea = logAreaProduct(estate, quad, sgroup);
615                                         double *lxk = ba81LikelihoodFast(expectation, sgroup, quad);
616                                         for (int px=0; px < numUnique; px++) {
617                                                 double Ei = estate->allElxk[eIndex(estate, px)];
618                                                 double Eis = estate->Eslxk[esIndex(estate, sgroup, px)];
619                                                 double tmp = exp((Ei - Eis) + lxk[px] + logArea);
620                                                 mapLatentDeriv(state, estate, sgroup, tmp, derivCoef,
621                                                                uniqueDeriv + px * numLatents);
622                                         }
623                                 }
624                         }
625
626                         double priLogArea = estate->priLogQarea[qx];
627                         for (int px=0; px < numUnique; px++) {
628                                 double Ei = estate->allElxk[eIndex(estate, px)];
629                                 double tmp = exp(Ei + priLogArea);
630 #pragma omp atomic
631                                 patternLik[px] += tmp;
632                         }
633                 }
634         }
635
636         /*
637         std::vector<double> hess1(triangleLoc1(numLatents));
638         std::vector<double> hessSum(triangleLoc1(numLatents));
639
640         // could run nicely in parallel with numUnique * triangleLoc(numLatents) buffer
641         for (int px=0; px < numUnique; ++px) {
642                 gramProduct(uniqueDeriv + px * numLatents, numLatents, hess1.data());
643                 double dups = numIdentical[px];
644                 for (int rx=0; rx < triangleLoc1(numLatents); ++rx) {
645                         hessSum[rx] += hess1[rx] * dups;
646                 }
647         }
648         */
649
650 #pragma omp parallel for num_threads(Global->numThreads)
651         for (int px=0; px < numUnique; ++px) {
652                 double weight = numIdentical[px] / patternLik[px];
653                 for (int rx=0; rx < numLatents; ++rx) {
654                         uniqueDeriv[px * numLatents + rx] *= weight;
655                 }
656         }
657
658 #pragma omp parallel for num_threads(Global->numThreads)
659         for (int rx=0; rx < numLatents; ++rx) {
660                 for (int px=1; px < numUnique; ++px) {
661                         uniqueDeriv[rx] += uniqueDeriv[px * numLatents + rx];
662                 }
663         }
664
665         for (int l1=0; l1 < numLatents; ++l1) {
666                 int t1 = state->latentMap[l1];
667                 if (t1 < 0) continue;
668                 gradient[t1] -= 2 * uniqueDeriv[l1];
669
670                 /*
671                 for (int l2=0; l2 <= l1; ++l2) {
672                         int t2 = state->latentMap[l2];
673                         if (t2 < 0) continue;
674                         hessian[numLatents * t1 + t2] -= 2 * hessSum[triangleLoc1(l1) + l2];
675                 }
676                 */
677         }
678
679         Free(uniqueDeriv);
680
681         return TRUE;
682 }
683
684 static void recomputePatternLik(omxFitFunction *oo)
685 {
686         omxExpectation *expectation = oo->expectation;
687         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
688         int numUnique = estate->numUnique;
689         int numSpecific = estate->numSpecific;
690         int maxDims = estate->maxDims;
691         int primaryDims = maxDims;
692         double *patternLik = estate->patternLik;
693
694         if (!patternLik) {
695                 ba81Estep1(oo->expectation);
696                 return;
697         }
698
699         OMXZERO(patternLik, numUnique);
700         Free(estate->_logPatternLik);
701
702         if (numSpecific == 0) {
703 #pragma omp parallel for num_threads(Global->numThreads)
704                 for (long qx=0; qx < estate->totalQuadPoints; qx++) {
705                         int quad[maxDims];
706                         decodeLocation(qx, maxDims, estate->quadGridSize, quad);
707                         double where[maxDims];
708                         pointToWhere(estate, quad, where, maxDims);
709                         double logArea = estate->priLogQarea[qx];
710                         double *lxk = ba81LikelihoodFast(expectation, 0, quad);
711
712                         for (int px=0; px < numUnique; px++) {
713                                 double tmp = exp(lxk[px] + logArea);
714 #pragma omp atomic
715                                 patternLik[px] += tmp;
716                         }
717                 }
718         } else {
719                 primaryDims -= 1;
720
721 #pragma omp parallel for num_threads(Global->numThreads)
722                 for (long qx=0; qx < estate->totalPrimaryPoints; qx++) {
723                         int quad[maxDims];
724                         decodeLocation(qx, primaryDims, estate->quadGridSize, quad);
725
726                         cai2010(expectation, FALSE, quad);
727
728                         double priLogArea = estate->priLogQarea[qx];
729                         for (int px=0; px < numUnique; px++) {
730                                 double Ei = estate->allElxk[eIndex(estate, px)];
731                                 double tmp = exp(Ei + priLogArea);
732 #pragma omp atomic
733                                 patternLik[px] += tmp;
734                         }
735                 }
736         }
737 }
738
739 static void setLatentStartingValues(omxFitFunction *oo, FitContext *fc)
740 {
741         BA81FitState *state = (BA81FitState*) oo->argStruct;
742         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
743
744         std::vector<int> &latentMap = state->latentMap;
745         if (!latentMap.size()) buildLatentParamMap(oo, fc->varGroup);
746
747         double *ElatentMean = estate->ElatentMean;
748         double *ElatentCov = estate->ElatentCov;
749         int maxAbilities = estate->maxAbilities;
750
751         for (int a1 = 0; a1 < maxAbilities; ++a1) {
752                 if (latentMap[a1] >= 0) {
753                         int to = latentMap[a1];
754                         fc->est[to] = ElatentMean[a1];
755                 }
756
757                 for (int a2 = 0; a2 <= a1; ++a2) {
758                         int to = latentMap[maxAbilities + triangleLoc1(a1) + a2];
759                         if (to < 0) continue;
760                         fc->est[to] = ElatentCov[a1 * maxAbilities + a2];
761                 }
762         }
763         //fc->log("setLatentStartingValues", FF_COMPUTE_ESTIMATE);
764 }
765
766 static double
767 ba81ComputeFit(omxFitFunction* oo, int want, FitContext *fc)
768 {
769         BA81FitState *state = (BA81FitState*) oo->argStruct;
770         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
771
772         ++state->fitCount;
773
774         if (estate->type == EXPECTATION_AUGMENTED) {
775                 if (!state->paramMap) buildItemParamMap(oo, fc->varGroup);
776
777                 if (want & FF_COMPUTE_PREOPTIMIZE) {
778                         if (!state->paramMap) buildItemParamMap(oo, fc->varGroup);
779                         schilling_bock_2005_rescale(oo, fc); // how does this work in multigroup? TODO
780                         return 0;
781                 }
782
783                 if (want & FF_COMPUTE_POSTOPTIMIZE) {
784                         omxForceCompute(estate->EitemParam);
785                         ba81Estep1(oo->expectation);
786                         return 0;
787                 }
788
789                 if (want & FF_COMPUTE_GRADIENT) ++state->gradientCount;
790
791                 for (size_t nx=0; nx < state->NAtriangle.size(); ++nx) {
792                         fc->hess[ state->NAtriangle[nx] ] = nan("symmetric");
793                 }
794
795                 if (state->numItemParam != fc->varGroup->vars.size()) error("mismatch"); // remove TODO
796                 double got = ba81ComputeMFit1(oo, want, fc->grad, fc->hess);
797                 return got;
798         } else if (estate->type == EXPECTATION_OBSERVED) {
799                 if (state->latentMap.size() == 0) buildLatentParamMap(oo, fc->varGroup);
800
801                 omxExpectation *expectation = oo->expectation;
802
803                 if (want & FF_COMPUTE_PREOPTIMIZE) {
804                         setLatentStartingValues(oo, fc);
805                         return 0;
806                 }
807
808                 if (want & FF_COMPUTE_GRADIENT) {
809                         ba81SetupQuadrature(expectation, estate->targetQpoints, 0);
810                         ba81buildLXKcache(expectation);
811                         if (!latentDeriv(oo, fc->grad)) {
812                                 return INFINITY;
813                         }
814                 }
815
816                 if (want & FF_COMPUTE_HESSIAN) {
817                         warning("%s: Hessian is not available for latent distribution parameters", NAME);
818                 }
819
820                 if (want & FF_COMPUTE_FIT) {
821                         if (!(want & FF_COMPUTE_GRADIENT)) {
822                                 ba81SetupQuadrature(expectation, estate->targetQpoints, 0);
823                                 recomputePatternLik(oo);
824                         }
825                         double *logPatternLik = getLogPatternLik(expectation);
826                         int *numIdentical = estate->numIdentical;
827                         int numUnique = estate->numUnique;
828                         double got = 0;
829                         for (int ux=0; ux < numUnique; ux++) {
830                                 got += numIdentical[ux] * logPatternLik[ux];
831                         }
832                         //mxLog("fit %.2f", -2 * got);
833                         return -2 * got;
834                 }
835
836                 // if (want & FF_COMPUTE_POSTOPTIMIZE)  discard lxk cache? TODO
837
838                 return 0;
839         } else {
840                 error("Confused");
841         }
842 }
843
844 static void ba81Compute(omxFitFunction *oo, int want, FitContext *fc)
845 {
846         if (!want) return;
847         double got = ba81ComputeFit(oo, want, fc);
848         if (got) oo->matrix->data[0] = got;
849 }
850
851 BA81FitState::~BA81FitState()
852 {
853         omxFreeAllMatrixData(customPrior);
854         Free(paramMap);
855         Free(tmpLatentMean);
856         Free(tmpLatentCov);
857         omxFreeAllMatrixData(itemParam);
858         omxFreeAllMatrixData(icov);
859 }
860
861 static void ba81Destroy(omxFitFunction *oo) {
862         BA81FitState *state = (BA81FitState *) oo->argStruct;
863         delete state;
864 }
865
866 void omxInitFitFunctionBA81(omxFitFunction* oo)
867 {
868         if (!oo->argStruct) { // ugh!
869                 BA81FitState *state = new BA81FitState;
870                 oo->argStruct = state;
871         }
872
873         BA81FitState *state = (BA81FitState*) oo->argStruct;
874         SEXP rObj = oo->rObj;
875
876         omxExpectation *expectation = oo->expectation;
877         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
878
879         //newObj->data = oo->expectation->data;
880
881         oo->computeFun = ba81Compute;
882         oo->setVarGroup = ba81SetFreeVarGroup;
883         oo->destructFun = ba81Destroy;
884         oo->gradientAvailable = TRUE;
885         oo->hessianAvailable = TRUE;
886
887         state->itemParam =
888                 omxNewMatrixFromSlot(rObj, globalState, "ItemParam");
889
890         if (estate->EitemParam->rows != state->itemParam->rows ||
891             estate->EitemParam->cols != state->itemParam->cols) {
892                 error("ItemParam and EItemParam must be of the same dimension");
893         }
894
895         state->customPrior =
896                 omxNewMatrixFromSlot(rObj, globalState, "CustomPrior");
897         
898         int maxParam = state->itemParam->rows;
899         state->itemDerivPadSize = maxParam + triangleLoc1(maxParam);
900
901         int maxAbilities = estate->maxAbilities;
902
903         state->tmpLatentMean = Realloc(NULL, estate->maxDims, double);
904         state->tmpLatentCov = Realloc(NULL, estate->maxDims * estate->maxDims, double);
905
906         int numItems = state->itemParam->cols;
907         for (int ix=0; ix < numItems; ix++) {
908                 const double *spec = estate->itemSpec[ix];
909                 int id = spec[RPF_ISpecID];
910                 if (id < 0 || id >= rpf_numModels) {
911                         error("ItemSpec %d has unknown item model %d", ix, id);
912                 }
913         }
914
915         state->icov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
916 }