ifa: No point in *2 the EM LL; that's just for fitting
[openmx:openmx.git] / src / omxExpectationBA81.c
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 // Consider replacing log() with log2() in some places? Not worth it?
19
20 #include <Rmath.h>
21 #include "omxExpectation.h"
22 #include "omxOpenmpWrap.h"
23 #include "npsolWrap.h"
24 #include "libirt-rpf.h"
25 #include "omxOptimizer.h"  // remove TODO
26 #include "dmvnorm.h"
27
28 static const char *NAME = "ExpectationBA81";
29
30 static const struct rpf *rpf_model = NULL;
31 static int rpf_numModels;
32 static const double MIN_PATTERNLIK = 1e-100;
33
34 typedef struct {
35
36         // data characteristics
37         omxData *data;
38         int numUnique;
39         int *numIdentical;        // length numUnique
40         double *logNumIdentical;  // length numUnique
41         int *rowMap;              // length numUnique
42
43         // item description related
44         omxMatrix *itemSpec;
45         int maxOutcomes;
46         int maxDims;
47         int maxAbilities;
48         int numSpecific;
49         int *Sgroup;              // item's specific group 0..numSpecific-1
50         omxMatrix *design;        // items * maxDims
51
52         // quadrature related
53         double Qwidth;
54         double targetQpoints;
55         long quadGridSize;
56         long totalQuadPoints;     // quadGridSize ^ maxDims
57         long totalPrimaryPoints;  // totalQuadPoints except for specific dim TODO
58         double *Qpoint;           // quadGridSize
59         double *priLogQarea;      // totalPrimaryPoints
60         double *speLogQarea;      // quadGridSize * numSpecific
61
62         // estimation related
63         omxMatrix *EitemParam;    // E step version
64         int userEitemParam;
65         int rescale;
66         omxMatrix *itemParam;     // M step version
67         omxMatrix *customPrior;   // TODO remove?
68         int derivPadSize;         // maxParam + maxParam*(1+maxParam)/2
69         double *thrDeriv;         // itemParam->cols * derivPadSize * thread
70         int *paramMap;            // itemParam->cols * derivPadSize -> index of free parameter
71         int cacheLXK;
72         double *lxk;              // wo/cache, numUnique * thread
73         double *allSlxk;          // numUnique * thread
74         double *Slxk;             // numUnique * #specific dimensions * thread
75         double *patternLik;       // numUnique
76         int totalOutcomes;
77         double *expected;         // totalOutcomes * totalQuadPoints
78         double *latentMean;       // maxAbilities * numUnique
79         double *latentCov;        // maxAbilities * maxAbilities * numUnique ; only lower triangle is used
80         double *latentMean1;      // maxDims
81         double *latentCov1;       // maxDims * maxDims ; only lower triangle is used
82         double *latentMeanOut;    // maxAbilities
83         double *latentCovOut;     // maxAbilities * maxAbilities ; only lower triangle is used
84         // also need corresponding label matrices for equating
85         int *freeMean;
86         int *freeCov;
87         int choleskyError;
88
89         int gradientCount;
90         int fitCount;
91         double lastEMLL;
92 } omxBA81State;
93
94 static void
95 pda(const double *ar, int rows, int cols) {   // column major order
96         for (int rx=0; rx < rows; rx++) {
97                 for (int cx=0; cx < cols; cx++) {
98                         Rprintf("%.6g, ", ar[cx * rows + rx]);
99                 }
100                 Rprintf("\n");
101         }
102 }
103
104 static void
105 pia(const int *ar, int rows, int cols) {   // column major order
106         for (int rx=0; rx < rows; rx++) {
107                 for (int cx=0; cx < cols; cx++) {
108                         Rprintf("%d, ", ar[cx * rows + rx]);
109                 }
110                 Rprintf("\n");
111         }
112 }
113
114 static int
115 getNumThreads(omxExpectation* oo)
116 {
117         int numThreads = oo->currentState->numThreads;
118         if (numThreads < 1) numThreads = 1;
119         return numThreads;
120 }
121
122 static void buildParamMap(omxExpectation* oo)
123 {
124         omxState* currentState = oo->currentState;
125         omxBA81State *state = (omxBA81State *) oo->argStruct;
126         omxMatrix *itemParam = state->itemParam;
127         int size = itemParam->cols * state->derivPadSize;
128
129         state->paramMap = Realloc(NULL, size, int);
130         for (int px=0; px < size; px++) {
131                 state->paramMap[px] = -1;
132         }
133
134         int numFreeParams = currentState->numFreeParams;
135         int *pRow = Realloc(NULL, numFreeParams, int);
136         int *pCol = Realloc(NULL, numFreeParams, int);
137
138         for (int px=0; px < numFreeParams; px++) {
139                 omxFreeVar *fv = currentState->freeVarList + px;
140                 for (int lx=0; lx < fv->numLocations; lx++) {
141                         if (~fv->matrices[lx] == itemParam->matrixNumber) {
142                                 pRow[px] = fv->row[lx];
143                                 pCol[px] = fv->col[lx];
144                                 int at = pCol[px] * state->derivPadSize + pRow[px];
145                                 state->paramMap[at] = px;
146                         }
147                 }
148         }
149
150         for (int p1=0; p1 < numFreeParams; p1++) {
151                 for (int p2=p1; p2 < numFreeParams; p2++) {
152                         if (pCol[p1] != pCol[p2]) continue;
153                         const double *spec = omxMatrixColumn(state->itemSpec, pCol[p1]);
154                         int id = spec[RPF_ISpecID];
155                         int numParam = (*rpf_model[id].numParam)(spec);
156                         int r1 = pRow[p1];
157                         int r2 = pRow[p2];
158                         if (r1 > r2) { int tmp=r1; r1=r2; r2=tmp; }
159                         int rowOffset = 0;
160                         for (int rx=1; rx <= r2; rx++) rowOffset += rx;
161                         int at = pCol[p1] * state->derivPadSize + numParam + rowOffset + r1;
162                         state->paramMap[at] = numFreeParams + p1 * numFreeParams + p2;
163                 }
164         }
165
166         Free(pRow);
167         Free(pCol);
168
169         state->thrDeriv = Realloc(NULL, itemParam->cols * state->derivPadSize * getNumThreads(oo), double);
170 }
171
172 OMXINLINE static void
173 pointToWhere(omxBA81State *state, const int *quad, double *where, int upto)
174 {
175         for (int dx=0; dx < upto; dx++) {
176                 where[dx] = state->Qpoint[quad[dx]];
177         }
178 }
179
180 OMXINLINE static void
181 assignDims(omxMatrix *itemSpec, omxMatrix *design, int dims, int maxDims, int ix,
182            const double *restrict theta, double *restrict ptheta)
183 {
184         for (int dx=0; dx < dims; dx++) {
185                 int ability = (int)omxMatrixElement(design, dx, ix) - 1;
186                 if (ability >= maxDims) ability = maxDims-1;
187                 ptheta[dx] = theta[ability];
188         }
189 }
190
191 /**
192  * \param theta Vector of ability parameters, one per ability
193  * \returns A numItems by maxOutcomes colMajor vector of doubles. Caller must Free it.
194  */
195 static double *
196 computeRPF(omxExpectation *oo, omxMatrix *itemParam, const int *quad)
197 {
198         omxBA81State *state = (omxBA81State*) oo->argStruct;
199         omxMatrix *itemSpec = state->itemSpec;
200         int numItems = itemSpec->cols;
201         omxMatrix *design = state->design;
202         int maxDims = state->maxDims;
203
204         double theta[maxDims];
205         pointToWhere(state, quad, theta, maxDims);
206
207         double *outcomeProb = Realloc(NULL, numItems * state->maxOutcomes, double);
208         //double *outcomeProb = Calloc(numItems * state->maxOutcomes, double);
209
210         for (int ix=0; ix < numItems; ix++) {
211                 const double *spec = omxMatrixColumn(itemSpec, ix);
212                 double *iparam = omxMatrixColumn(itemParam, ix);
213                 double *out = outcomeProb + ix * state->maxOutcomes;
214                 int id = spec[RPF_ISpecID];
215                 int dims = spec[RPF_ISpecDims];
216                 double ptheta[dims];
217                 assignDims(itemSpec, design, dims, maxDims, ix, theta, ptheta);
218                 (*rpf_model[id].logprob)(spec, iparam, ptheta, out);
219 #if 0
220                 for (int ox=0; ox < spec[RPF_ISpecOutcomes]; ox++) {
221                         if (!isfinite(out[ox]) || out[ox] > 0) {
222                                 Rprintf("spec\n");
223                                 pda(spec, itemSpec->rows, 1);
224                                 Rprintf("item param\n");
225                                 pda(iparam, itemParam->rows, 1);
226                                 Rprintf("where\n");
227                                 pda(ptheta, dims, 1);
228                                 error("RPF returned %20.20f", out[ox]);
229                         }
230                 }
231 #endif
232         }
233
234         return outcomeProb;
235 }
236
237 OMXINLINE static long
238 encodeLocation(const int dims, const long grid, const int *restrict quad)
239 {
240         long qx = 0;
241         for (int dx=dims-1; dx >= 0; dx--) {
242                 qx = qx * grid;
243                 qx += quad[dx];
244         }
245         return qx;
246 }
247
248 OMXINLINE static double *
249 getLXKcache(omxBA81State *state, const int *quad, const int specific)
250 {
251         long ordinate;
252         if (state->numSpecific == 0) {
253                 ordinate = encodeLocation(state->maxDims, state->quadGridSize, quad);
254         } else {
255                 ordinate = (specific * state->totalQuadPoints +
256                             encodeLocation(state->maxDims, state->quadGridSize, quad));
257         }
258         return state->lxk + state->numUnique * ordinate;
259 }
260
261 OMXINLINE static double *
262 ba81Likelihood(omxExpectation *oo, int specific, const int *restrict quad)
263 {
264         omxBA81State *state = (omxBA81State*) oo->argStruct;
265         int numUnique = state->numUnique;
266         int maxOutcomes = state->maxOutcomes;
267         omxData *data = state->data;
268         int numItems = state->itemSpec->cols;
269         int *restrict Sgroup = state->Sgroup;
270         double *restrict lxk;
271
272         if (!state->cacheLXK) {
273                 lxk = state->lxk + numUnique * omx_absolute_thread_num();
274         } else {
275                 lxk = getLXKcache(state, quad, specific);
276         }
277
278         const double *outcomeProb = computeRPF(oo, state->EitemParam, quad);
279         if (!outcomeProb) {
280                 OMXZERO(lxk, numUnique);
281                 return lxk;
282         }
283
284         const int *rowMap = state->rowMap;
285         for (int px=0; px < numUnique; px++) {
286                 double lxk1 = 0;
287                 for (int ix=0; ix < numItems; ix++) {
288                         if (specific != Sgroup[ix]) continue;
289                         int pick = omxIntDataElementUnsafe(data, rowMap[px], ix);
290                         if (pick == NA_INTEGER) continue;
291                         double piece = outcomeProb[ix * maxOutcomes + pick-1];  // move -1 elsewhere TODO
292                         lxk1 += piece;
293                 }
294 #if 0
295 #pragma omp critical(ba81LikelihoodDebug1)
296                 if (!isfinite(lxk1) || lxk1 > numItems) {
297                         Rprintf("where\n");
298                         double where[state->maxDims];
299                         pointToWhere(state, quad, where, state->maxDims);
300                         pda(where, state->maxDims, 1);
301                         Rprintf("prob\n");
302                         pda(outcomeProb, numItems, maxOutcomes);
303                         error("Likelihood of row %d is %f", rowMap[px], lxk1);
304                 }
305 #endif
306                 lxk[px] = lxk1;
307         }
308
309         Free(outcomeProb);
310
311         return lxk;
312 }
313
314 OMXINLINE static double *
315 ba81LikelihoodFast(omxExpectation *oo, int specific, const int *restrict quad)
316 {
317         omxBA81State *state = (omxBA81State*) oo->argStruct;
318         if (!state->cacheLXK) {
319                 return ba81Likelihood(oo, specific, quad);
320         } else {
321                 return getLXKcache(state, quad, specific);
322         }
323
324 }
325
326 OMXINLINE static void
327 mapLatentSpace(omxBA81State *state, int px, int sgroup, double piece, const double *where)
328 {
329         double *latentMean = state->latentMean;
330         double *latentCov = state->latentCov;
331         int maxDims = state->maxDims;
332         int maxAbilities = state->maxAbilities;
333         int pmax = maxDims;
334         if (state->numSpecific) pmax -= 1;
335
336         if (sgroup == 0) {
337                 for (int d1=0; d1 < pmax; d1++) {
338                         double piece_w1 = piece * where[d1];
339                         int mloc = px * maxAbilities + d1;
340 #pragma omp atomic
341                         latentMean[mloc] += piece_w1;
342                         for (int d2=0; d2 <= d1; d2++) {
343                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
344                                 double piece_cov = piece_w1 * where[d2];
345 #pragma omp atomic
346                                 latentCov[loc] += piece_cov;
347                         }
348                 }
349         }
350
351         if (state->numSpecific) {
352                 int sdim = maxDims + sgroup - 1;
353
354                 double piece_w1 = piece * where[maxDims-1];
355                 int mloc = px * maxAbilities + sdim;
356 #pragma omp atomic
357                 latentMean[mloc] += piece_w1;
358
359                 int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
360                 double piece_var = piece_w1 * where[maxDims-1];
361 #pragma omp atomic
362                 latentCov[loc] += piece_var;
363         }
364 }
365
366 OMXINLINE static double
367 logAreaProduct(omxBA81State *state, const int *restrict quad, const int sg)
368 {
369         int maxDims = state->maxDims;
370         if (state->numSpecific == 0) {
371                 long qloc = encodeLocation(maxDims, state->quadGridSize, quad);
372                 return state->priLogQarea[qloc];
373         } else {
374                 long priloc = encodeLocation(maxDims-1, state->quadGridSize, quad);
375                 return (state->priLogQarea[priloc] +
376                         state->speLogQarea[sg * state->quadGridSize + quad[maxDims - 1]]);
377         }
378 }
379
380 #define CALC_ALLSLXK(state, numUnique) \
381         (state->allSlxk + omx_absolute_thread_num() * (numUnique))
382
383 #define CALC_SLXK(state, numUnique, numSpecific) \
384         (state->Slxk + omx_absolute_thread_num() * (numUnique) * (numSpecific))
385
386 OMXINLINE static void
387 cai2010(omxExpectation* oo, int recompute, const int *restrict primaryQuad,
388         double *restrict allSlxk, double *restrict Slxk)
389 {
390         omxBA81State *state = (omxBA81State*) oo->argStruct;
391         int numUnique = state->numUnique;
392         int numSpecific = state->numSpecific;
393         int maxDims = state->maxDims;
394         int sDim = maxDims-1;
395
396         int quad[maxDims];
397         memcpy(quad, primaryQuad, sizeof(int)*sDim);
398
399         OMXZERO(Slxk, numUnique * numSpecific);
400         OMXZERO(allSlxk, numUnique);
401
402         for (int sx=0; sx < numSpecific; sx++) {
403                 double *eis = Slxk + numUnique * sx;
404                 int quadGridSize = state->quadGridSize;
405
406                 for (int qx=0; qx < quadGridSize; qx++) {
407                         quad[sDim] = qx;
408                         double where[maxDims];
409                         pointToWhere(state, quad, where, maxDims);
410
411                         double *lxk;
412                         if (recompute) {
413                                 lxk = ba81Likelihood(oo, sx, quad);
414                         } else {
415                                 lxk = getLXKcache(state, quad, sx);
416                         }
417
418                         for (int ix=0; ix < numUnique; ix++) {
419                                 eis[ix] += exp(lxk[ix] + state->priLogQarea[qx]);
420                         }
421                 }
422
423                 for (int px=0; px < numUnique; px++) {
424                         eis[px] = log(eis[px]);
425                         allSlxk[px] += eis[px];
426                 }
427         }
428 }
429
430 OMXINLINE static void
431 decodeLocation(long qx, const int dims, const long grid,
432                int *restrict quad)
433 {
434         for (int dx=0; dx < dims; dx++) {
435                 quad[dx] = qx % grid;
436                 qx = qx / grid;
437         }
438 }
439
440 static void
441 ba81Estep1(omxExpectation *oo) {
442         if(OMX_DEBUG_MML) {Rprintf("Beginning %s Computation.\n", NAME);}
443
444         omxBA81State *state = (omxBA81State*) oo->argStruct;
445         double *patternLik = state->patternLik;
446         int numUnique = state->numUnique;
447         int numSpecific = state->numSpecific;
448         double *latentMean = state->latentMean;
449         double *latentCov = state->latentCov;
450         int maxDims = state->maxDims;
451         int maxAbilities = state->maxAbilities;
452         int primaryDims = maxDims;
453
454         OMXZERO(patternLik, numUnique);
455         OMXZERO(latentMean, numUnique * maxAbilities);
456         OMXZERO(latentCov, numUnique * maxAbilities * maxAbilities);
457
458         // E-step, marginalize person ability
459         //
460         // Note: In the notation of Bock & Aitkin (1981) and
461         // Cai~(2010), these loops are reversed.  That is, the inner
462         // loop is over quadrature points and the outer loop is over
463         // all response patterns.
464         //
465         if (numSpecific == 0) {
466 #pragma omp parallel for num_threads(oo->currentState->numThreads)
467                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
468                         int quad[maxDims];
469                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
470                         double where[maxDims];
471                         pointToWhere(state, quad, where, maxDims);
472
473                         double *lxk = ba81Likelihood(oo, 0, quad);
474
475                         double logArea = state->priLogQarea[qx];
476 #pragma omp critical(EstepUpdate)
477                         for (int px=0; px < numUnique; px++) {
478                                 double tmp = exp(lxk[px] + logArea);
479 #if 0
480                                 if (!isfinite(tmp)) {
481                                         Rprintf("where\n");
482                                         pda(where, maxDims, 1);
483                                         error("Row %d lxk %f logArea %f tmp %f",
484                                               state->rowMap[px], lxk[px], logArea, tmp);
485                                 }
486 #endif
487                                 patternLik[px] += tmp;
488                                 mapLatentSpace(state, px, 0, tmp, where);
489                         }
490                 }
491         } else {
492                 primaryDims -= 1;
493                 int sDim = primaryDims;
494                 long specificPoints = state->quadGridSize;
495
496 #pragma omp parallel for num_threads(oo->currentState->numThreads)
497                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
498                         int quad[maxDims];
499                         decodeLocation(qx, primaryDims, state->quadGridSize, quad);
500
501                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
502                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
503                         cai2010(oo, TRUE, quad, allSlxk, Slxk);
504
505                         for (int sgroup=0; sgroup < numSpecific; sgroup++) {
506                                 double *eis = Slxk + numUnique * sgroup;
507                                 for (long sx=0; sx < specificPoints; sx++) {
508                                         quad[sDim] = sx;
509                                         double where[maxDims];
510                                         pointToWhere(state, quad, where, maxDims);
511                                         double logArea = logAreaProduct(state, quad, sgroup);
512                                         double *lxk = ba81LikelihoodFast(oo, sgroup, quad);
513                                         for (int px=0; px < numUnique; px++) {
514                                                 double tmp = exp((allSlxk[px] - eis[px]) + lxk[px] + logArea);
515                                                 mapLatentSpace(state, px, sgroup, tmp, where);
516                                         }
517                                 }
518                         }
519
520                         double priLogArea = state->priLogQarea[qx];
521 #pragma omp critical(EstepUpdate)
522                         for (int px=0; px < numUnique; px++) {
523                                 double tmp = exp(allSlxk[px] + priLogArea);
524                                 patternLik[px] += tmp;  // is it faster to make this line atomic? TODO
525                         }
526                 }
527         }
528
529         int *numIdentical = state->numIdentical;
530
531         if(0) {
532                 Rprintf("weight\n");
533                 for (int px=0; px < numUnique; px++) {
534                         double weight = numIdentical[px] / patternLik[px];
535                         Rprintf("%20.20f\n", weight);
536                 }
537
538                 Rprintf("per item mean\n");
539                 for (int px=0; px < numUnique; px++) {
540                         Rprintf("[%d] %20.20f\n", px, latentMean[px * maxAbilities]);
541                 }
542         }
543
544         for (int px=0; px < numUnique; px++) {
545                 if (patternLik[px] < MIN_PATTERNLIK) {
546                         patternLik[px] = MIN_PATTERNLIK;
547                         warning("Likelihood of pattern %d is 0, forcing to %.3g",
548                                 px, MIN_PATTERNLIK);
549                 }
550
551                 double weight = numIdentical[px] / patternLik[px];
552                 for (int d1=0; d1 < primaryDims; d1++) {
553                         latentMean[px * maxAbilities + d1] *= weight;
554                         for (int d2=0; d2 <= d1; d2++) {
555                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
556                                 latentCov[loc] *= weight;
557                         }
558                 }
559                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
560                         latentMean[px * maxAbilities + sdim] *= weight;
561                         int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
562                         latentCov[loc] *= weight;
563                 }
564 #if 0
565                 if (!isfinite(patternLik[px])) {
566                         error("Likelihood of row %d is %f", state->rowMap[px], patternLik[px]);
567                 }
568 #endif
569                 patternLik[px] = log(patternLik[px]);
570         }
571
572         for (int px=1; px < numUnique; px++) {
573                 for (int d1=0; d1 < primaryDims; d1++) {
574                         latentMean[d1] += latentMean[px * maxAbilities + d1];
575                         for (int d2=0; d2 <= d1; d2++) {
576                                 int cell = d2 * maxAbilities + d1;
577                                 int loc = px * maxAbilities * maxAbilities + cell;
578                                 latentCov[cell] += latentCov[loc];
579                         }
580                 }
581                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
582                         latentMean[sdim] += latentMean[px * maxAbilities + sdim];
583                         int cell = sdim * maxAbilities + sdim;
584                         int loc = px * maxAbilities * maxAbilities + cell;
585                         latentCov[cell] += latentCov[loc];
586                 }
587         }
588
589         //pda(latentMean, state->maxAbilities, 1);
590         //pda(latentCov, state->maxAbilities, state->maxAbilities);
591
592         omxData *data = state->data;
593         for (int d1=0; d1 < maxAbilities; d1++) {
594                 latentMean[d1] /= data->rows;
595         }
596
597         for (int d1=0; d1 < primaryDims; d1++) {
598                 for (int d2=0; d2 <= d1; d2++) {
599                         int cell = d2 * maxAbilities + d1;
600                         int tcell = d1 * maxAbilities + d2;
601                         latentCov[tcell] = latentCov[cell] =
602                                 latentCov[cell] / data->rows - latentMean[d1] * latentMean[d2];
603                 }
604         }
605         for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
606                 int cell = sdim * maxAbilities + sdim;
607                 latentCov[cell] = latentCov[cell] / data->rows - latentMean[sdim] * latentMean[sdim];
608         }
609
610         //Rprintf("E-step\n");
611         //pda(latentMean, state->maxAbilities, 1);
612         //pda(latentCov, state->maxAbilities, state->maxAbilities);
613 }
614
615 static void
616 schilling_bock_2005_rescale(omxExpectation *oo)
617 {
618         omxBA81State *state = (omxBA81State*) oo->argStruct;
619         omxMatrix *itemSpec = state->itemSpec;
620         omxMatrix *itemParam = state->itemParam;
621         omxMatrix *design = state->design;
622         double *latentMean = state->latentMean;
623         double *latentCov = state->latentCov;
624         double *latentMean1 = state->latentMean1;
625         double *latentCov1 = state->latentCov1;
626         int maxAbilities = state->maxAbilities;
627         int maxDims = state->maxDims;
628
629         //Rprintf("schilling bock\n");
630         //pda(latentMean, maxAbilities, 1);
631         //pda(latentCov, maxAbilities, maxAbilities);
632         //omxPrint(design, "design");
633
634         const char triangle = 'L';
635         F77_CALL(dpotrf)(&triangle, &maxAbilities, latentCov, &maxAbilities, &state->choleskyError);
636         if (state->choleskyError != 0) {
637                 warning("Cholesky failed with %d; rescaling disabled", state->choleskyError); // make error TODO?
638                 return;
639         }
640
641         int numItems = itemParam->cols;
642         for (int ix=0; ix < numItems; ix++) {
643                 const double *spec = omxMatrixColumn(itemSpec, ix);
644                 int id = spec[RPF_ISpecID];
645                 const double *rawDesign = omxMatrixColumn(design, ix);
646                 int idesign[design->rows];
647                 int idx = 0;
648                 for (int dx=0; dx < design->rows; dx++) {
649                         if (isfinite(rawDesign[dx])) {
650                                 idesign[idx++] = rawDesign[dx]-1;
651                         } else {
652                                 idesign[idx++] = -1;
653                         }
654                 }
655                 for (int d1=0; d1 < idx; d1++) {
656                         if (idesign[d1] == -1) {
657                                 latentMean1[d1] = 0;
658                         } else {
659                                 latentMean1[d1] = latentMean[idesign[d1]];
660                         }
661                         for (int d2=0; d2 <= d1; d2++) {
662                                 int cell = idesign[d2] * maxAbilities + idesign[d1];
663                                 if (idesign[d1] == -1 || idesign[d2] == -1) {
664                                         latentCov1[d2 * maxDims + d1] = d1==d2? 1 : 0;
665                                 } else {
666                                         latentCov1[d2 * maxDims + d1] = latentCov[cell];
667                                 }
668                         }
669                 }
670                 if (1) {  // ease debugging, make optional TODO
671                         for (int d1=idx; d1 < maxDims; d1++) latentMean1[d1] = nan("");
672                         for (int d1=0; d1 < maxDims; d1++) {
673                                 for (int d2=0; d2 < maxDims; d2++) {
674                                         if (d1 < idx && d2 < idx) continue;
675                                         latentCov1[d2 * maxDims + d1] = nan("");
676                                 }
677                         }
678                 }
679                 double *iparam = omxMatrixColumn(itemParam, ix);
680                 int *mask = state->paramMap + state->derivPadSize * ix;
681                 rpf_model[id].rescale(spec, iparam, mask, latentMean1, latentCov1);
682         }
683
684         // This is an egregious hack :-)
685         omxState* currentState = oo->currentState;
686         int numFreeParams = currentState->numFreeParams;
687         double param[numFreeParams];
688         for (int rx=0; rx < itemParam->rows; rx++) {
689                 for (int cx=0; cx < itemParam->cols; cx++) {
690                         int vx = state->paramMap[cx * state->derivPadSize + rx];
691                         if (vx >= 0 && vx < numFreeParams) {
692                                 // Look for gradient entries to determine what to copy.
693                                 // I said it was a hack, no?
694                                 param[vx] = omxMatrixElement(itemParam, rx, cx);
695                         }
696                 }
697         }
698         handleFreeVarList(currentState, param, numFreeParams);
699 }
700
701 // Attempt G-H grid? http://dbarajassolano.wordpress.com/2012/01/26/on-sparse-grid-quadratures/
702 static void
703 ba81SetupQuadrature(omxExpectation* oo, int gridsize, int flat)
704 {
705         omxBA81State *state = (omxBA81State *) oo->argStruct;
706         int numUnique = state->numUnique;
707         int numThreads = getNumThreads(oo);
708         int maxDims = state->maxDims;
709         int Qwidth = state->Qwidth;
710         int numSpecific = state->numSpecific;
711         int priDims = maxDims - (numSpecific? 1 : 0);
712
713         // try starting small and increasing to the cap TODO
714         state->quadGridSize = gridsize;
715
716         state->totalQuadPoints = 1;
717         for (int dx=0; dx < maxDims; dx++) {
718                 state->totalQuadPoints *= state->quadGridSize;
719         }
720
721         state->totalPrimaryPoints = state->totalQuadPoints;
722
723         if (numSpecific) {
724                 state->totalPrimaryPoints /= state->quadGridSize;
725                 state->speLogQarea = Realloc(state->speLogQarea, state->quadGridSize * gridsize, double);
726         }
727
728         state->Qpoint = Realloc(state->Qpoint, state->quadGridSize, double);
729         state->priLogQarea = Realloc(state->priLogQarea, state->totalPrimaryPoints, double);
730
731         for (int px=0; px < state->quadGridSize; px ++) {
732                 state->Qpoint[px] = Qwidth - px * 2 * Qwidth / (state->quadGridSize-1);
733         }
734
735         if (flat) {
736                 double flatd = log(1) - log(state->totalPrimaryPoints);
737                 for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
738                         state->priLogQarea[qx] = flatd;
739                 }
740                 flatd = log(1) - log(state->quadGridSize);
741                 for (int sx=0; sx < numSpecific; sx++) {
742                         for (int qx=0; qx < state->quadGridSize; qx++) {
743                                 state->speLogQarea[ sx * state->quadGridSize + qx] = flatd;
744                         }
745                 }
746         } else {
747                 double totalArea = 0;
748                 for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
749                         int quad[priDims];
750                         decodeLocation(qx, priDims, state->quadGridSize, quad);
751                         double where[priDims];
752                         pointToWhere(state, quad, where, priDims);
753                         state->priLogQarea[qx] = dmvnorm(priDims, where, state->latentMeanOut, state->latentCovOut);
754                         totalArea += exp(state->priLogQarea[qx]);
755                 }
756                 totalArea = log(totalArea);
757                 for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
758                         state->priLogQarea[qx] -= totalArea;
759                         //Rprintf("%.5g,", state->priLogQarea[qx]);
760                 }
761                 //Rprintf("\n");
762
763                 for (int sx=0; sx < numSpecific; sx++) {
764                         totalArea = 0;
765                         for (int qx=0; qx < state->quadGridSize; qx++) {
766                                 int covCell = (priDims + sx) * state->maxAbilities + priDims + sx;
767                                 double den = dnorm(state->Qpoint[qx],
768                                                    state->latentMeanOut[priDims + sx],
769                                                    state->latentCovOut[covCell], TRUE);
770                                 state->speLogQarea[sx * state->quadGridSize + qx] = den;
771                                 totalArea += exp(den);
772                         }
773                         totalArea = log(totalArea);
774                         for (int qx=0; qx < state->quadGridSize; qx++) {
775                                 state->speLogQarea[sx * state->quadGridSize + qx] -= totalArea;
776                         }
777                 }
778         }
779
780         if (!state->cacheLXK) {
781                 state->lxk = Realloc(state->lxk, numUnique * numThreads, double);
782         } else {
783                 int ns = state->numSpecific;
784                 if (ns == 0) ns = 1;
785                 long numOrdinate = ns * state->totalQuadPoints;
786                 state->lxk = Realloc(state->lxk, numUnique * numOrdinate, double);
787         }
788
789         state->expected = Realloc(state->expected, state->totalOutcomes * state->totalQuadPoints, double);
790 }
791
792 OMXINLINE static void
793 updateLatentParam(omxExpectation* oo)
794 {
795         omxBA81State *state = (omxBA81State*) oo->argStruct;
796         int maxAbilities = state->maxAbilities;
797
798         for (int d1=0; d1 < maxAbilities; d1++) {
799                 if (state->freeMean[d1]) {
800                         state->latentMeanOut[d1] = state->latentMean[d1];
801                 }
802
803                 for (int d2=0; d2 <= d1; d2++) {
804                         int cell = d2 * maxAbilities + d1;
805                         if (state->freeCov[cell]) {
806                                 state->latentCovOut[cell] = state->latentCov[cell];
807                         }
808                 }
809         }
810         //Rprintf("updateLatentParam\n");
811         //pda(state->latentMeanOut, maxAbilities, 1);
812         //pda(state->latentCovOut, maxAbilities, maxAbilities);
813
814         ba81SetupQuadrature(oo, state->targetQpoints, 0);
815 }
816
817 OMXINLINE static void
818 expectedUpdate(omxData *restrict data, const int *rowMap, const int px, const int item,
819                const double observed, const int outcomes, double *out)
820 {
821         int pick = omxIntDataElementUnsafe(data, rowMap[px], item);
822         if (pick == NA_INTEGER) {
823                 double slice = exp(observed - log(outcomes));
824                 for (int ox=0; ox < outcomes; ox++) {
825                         out[ox] += slice;
826                 }
827         } else {
828                 out[pick-1] += exp(observed);
829         }
830 }
831
832 OMXINLINE static void
833 ba81Expected(omxExpectation* oo)
834 {
835         omxState* currentState = oo->currentState;  // only used in #pragma omp
836         omxBA81State *state = (omxBA81State*) oo->argStruct;
837         omxData *data = state->data;
838         int numSpecific = state->numSpecific;
839         const int *rowMap = state->rowMap;
840         double *patternLik = state->patternLik;
841         double *logNumIdentical = state->logNumIdentical;
842         int numUnique = state->numUnique;
843         int maxDims = state->maxDims;
844         int numItems = state->itemParam->cols;
845         omxMatrix *itemSpec = state->itemSpec;
846         int totalOutcomes = state->totalOutcomes;
847
848         OMXZERO(state->expected, totalOutcomes * state->totalQuadPoints);
849
850         if (numSpecific == 0) {
851 #pragma omp parallel for num_threads(currentState->numThreads)
852                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
853                         int quad[maxDims];
854                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
855                         double *lxk = ba81LikelihoodFast(oo, 0, quad);
856                         for (int px=0; px < numUnique; px++) {
857                                 double *out = state->expected + qx * totalOutcomes;
858                                 double observed = logNumIdentical[px] + lxk[px] - patternLik[px];
859                                 for (int ix=0; ix < numItems; ix++) {
860                                         const double *spec = omxMatrixColumn(itemSpec, ix);
861                                         int outcomes = spec[RPF_ISpecOutcomes];
862                                         expectedUpdate(data, rowMap, px, ix, observed, outcomes, out);
863                                         out += outcomes;
864                                 }
865                         }
866                 }
867         } else {
868                 int sDim = state->maxDims-1;
869                 long specificPoints = state->quadGridSize;
870
871 #pragma omp parallel for num_threads(currentState->numThreads)
872                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
873                         int quad[maxDims];
874                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
875
876                         // allSlxk, Slxk only depend on the ordinate of the primary dimensions
877                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
878                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
879                         cai2010(oo, !state->cacheLXK, quad, allSlxk, Slxk);
880
881                         for (long sx=0; sx < specificPoints; sx++) {
882                                 quad[sDim] = sx;
883                                 long qloc = encodeLocation(state->maxDims, state->quadGridSize, quad);
884
885                                 for (int sgroup=0; sgroup < numSpecific; sgroup++) {
886                                         double *eis = Slxk + numUnique * sgroup;
887                                         double *lxk = ba81LikelihoodFast(oo, sgroup, quad);
888
889                                         for (int px=0; px < numUnique; px++) {
890                                                 double *out = state->expected + totalOutcomes * qloc;
891
892                                                 for (int ix=0; ix < numItems; ix++) {
893                                                         const double *spec = omxMatrixColumn(itemSpec, ix);
894                                                         int outcomes = spec[RPF_ISpecOutcomes];
895                                                         if (state->Sgroup[ix] == sgroup) {
896                                                                 double observed = logNumIdentical[px] + (allSlxk[px] - eis[px]) +
897                                                                         (lxk[px] - patternLik[px]);
898                                                                 expectedUpdate(data, rowMap, px, ix, observed, outcomes, out);
899                                                         }
900                                                         out += outcomes;
901                                                 }
902                                         }
903                                 }
904                         }
905                 }
906         }
907         //pda(state->expected, state->totalOutcomes, state->totalQuadPoints);
908 }
909
910 static void
911 ba81Estep(omxExpectation *oo, enum ComputeExpectationContext ctx) {
912         if (ctx == COMPUTE_EXPECT_INITIALIZE) return;
913
914         omxBA81State *state = (omxBA81State*) oo->argStruct;
915         if (state->userEitemParam) {
916                 state->userEitemParam = FALSE;
917         } else {
918                 omxCopyMatrix(state->EitemParam, state->itemParam);
919         }
920         ba81Estep1(oo);
921         if (ctx == COMPUTE_EXPECT_PREFIT) {
922                 if (state->rescale) schilling_bock_2005_rescale(oo);
923         } else {
924                 updateLatentParam(oo);
925         }
926         ba81Expected(oo);
927 }
928
929 OMXINLINE static double
930 ba81Fit1Ordinate(omxExpectation* oo, const int *quad, const double *weight, int want)
931 {
932         omxBA81State *state = (omxBA81State*) oo->argStruct;
933         omxMatrix *itemSpec = state->itemSpec;
934         omxMatrix *itemParam = state->itemParam;
935         int numItems = itemParam->cols;
936         int maxOutcomes = state->maxOutcomes;
937         int maxDims = state->maxDims;
938         double *myDeriv = state->thrDeriv + itemParam->cols * state->derivPadSize * omx_absolute_thread_num();
939         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN);
940
941         double where[maxDims];
942         pointToWhere(state, quad, where, maxDims);
943
944         double *outcomeProb = computeRPF(oo, itemParam, quad); // avoid malloc/free? TODO
945         if (!outcomeProb) return 0;
946
947         double thr_ll = 0;
948         for (int ix=0; ix < numItems; ix++) {
949                 const double *spec = omxMatrixColumn(itemSpec, ix);
950                 int id = spec[RPF_ISpecID];
951                 int iOutcomes = spec[RPF_ISpecOutcomes];
952
953                 double area = exp(logAreaProduct(state, quad, state->Sgroup[ix]));   // avoid exp() here? TODO
954                 for (int ox=0; ox < iOutcomes; ox++) {
955 #if 0
956 #pragma omp critical(ba81Fit1OrdinateDebug1)
957                         if (!isfinite(outcomeProb[ix * maxOutcomes + ox])) {
958                                 pda(itemParam->data, itemParam->rows, itemParam->cols);
959                                 pda(outcomeProb, outcomes, numItems);
960                                 error("RPF produced NAs");
961                         }
962 #endif
963                         double got = weight[ox] * outcomeProb[ix * maxOutcomes + ox];
964                         thr_ll += got * area;
965                 }
966
967                 if (do_deriv) {
968                         double *iparam = omxMatrixColumn(itemParam, ix);
969                         double *pad = myDeriv + ix * state->derivPadSize;
970                         (*rpf_model[id].dLL1)(spec, iparam, where, area, weight, pad);
971                 }
972                 weight += iOutcomes;
973         }
974
975         Free(outcomeProb);
976
977         return thr_ll;
978 }
979
980 static double
981 ba81ComputeMFit1(omxExpectation* oo, int want, double *gradient, double *hessian)
982 {
983         omxBA81State *state = (omxBA81State*) oo->argStruct;
984         omxState* currentState = oo->currentState;  // only used in #pragma omp
985         omxMatrix *customPrior = state->customPrior;
986         omxMatrix *itemParam = state->itemParam;
987         omxMatrix *itemSpec = state->itemSpec;
988         int maxDims = state->maxDims;
989         const int totalOutcomes = state->totalOutcomes;
990
991         double ll = 0;
992         if (customPrior) {
993                 omxRecompute(customPrior);
994                 ll = customPrior->data[0];
995         }
996
997         if (!isfinite(ll)) {
998                 omxPrint(itemParam, "item param");
999                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
1000         }
1001
1002 #pragma omp parallel for num_threads(currentState->numThreads)
1003         for (long qx=0; qx < state->totalQuadPoints; qx++) {
1004                 //double area = exp(state->priLogQarea[qx]);  // avoid exp() here? TODO
1005                 int quad[maxDims];
1006                 decodeLocation(qx, maxDims, state->quadGridSize, quad);
1007                 double *weight = state->expected + qx * totalOutcomes;
1008                 double thr_ll = ba81Fit1Ordinate(oo, quad, weight, want);
1009                 
1010 #pragma omp atomic
1011                 ll += thr_ll;
1012         }
1013
1014         if (!customPrior && gradient) {
1015                 double *deriv0 = state->thrDeriv;
1016
1017                 int perThread = itemParam->cols * state->derivPadSize;
1018                 for (int th=1; th < getNumThreads(oo); th++) {
1019                         double *thrD = state->thrDeriv + th * perThread;
1020                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
1021                 }
1022
1023                 int numItems = itemParam->cols;
1024                 for (int ix=0; ix < numItems; ix++) {
1025                         const double *spec = omxMatrixColumn(itemSpec, ix);
1026                         int id = spec[RPF_ISpecID];
1027                         double *iparam = omxMatrixColumn(itemParam, ix);
1028                         double *pad = deriv0 + ix * state->derivPadSize;
1029                         (*rpf_model[id].dLL2)(spec, iparam, pad);
1030                 }
1031
1032                 int numFreeParams = currentState->numFreeParams;
1033                 int numParams = itemParam->cols * state->derivPadSize;
1034                 for (int ox=0; ox < numParams; ox++) {
1035                         int to = state->paramMap[ox];
1036                         if (to == -1) continue;
1037
1038                         // Need to check because this can happen if
1039                         // lbounds/ubounds are not set appropriately.
1040                         if (0 && !isfinite(deriv0[ox])) {
1041                                 int item = ox / itemParam->rows;
1042                                 Rprintf("item parameters:\n");
1043                                 const double *spec = omxMatrixColumn(itemSpec, item);
1044                                 int id = spec[RPF_ISpecID];
1045                                 int numParam = (*rpf_model[id].numParam)(spec);
1046                                 double *iparam = omxMatrixColumn(itemParam, item);
1047                                 pda(iparam, numParam, 1);
1048                                 // Perhaps bounds can be pulled in from librpf? TODO
1049                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
1050                                       ox, item, deriv0[ox]);
1051                         }
1052
1053                         if (to < numFreeParams) {
1054                                 gradient[to] -= deriv0[ox];
1055                         } else {
1056                                 if (hessian) hessian[to - numFreeParams] -= deriv0[ox];
1057                         }
1058                 }
1059         }
1060
1061         return -ll;
1062 }
1063
1064 double
1065 ba81ComputeFit(omxExpectation* oo, int want, double *gradient, double *hessian)
1066 {
1067         if (!want) return 0;
1068
1069         omxBA81State *state = (omxBA81State*) oo->argStruct;
1070         omxState* currentState = oo->currentState;
1071
1072         if (want & FF_COMPUTE_FIT) {
1073                 ++state->fitCount;
1074         }
1075
1076         if (want & (FF_COMPUTE_GRADIENT|FF_COMPUTE_HESSIAN)) {
1077                 // M-step
1078                 ++state->gradientCount;
1079
1080                 int numFreeParams = currentState->numFreeParams;
1081                 OMXZERO(gradient, numFreeParams);
1082                 if (hessian) OMXZERO(hessian, numFreeParams * numFreeParams);
1083
1084                 omxMatrix *itemParam = state->itemParam;
1085                 OMXZERO(state->thrDeriv, state->derivPadSize * itemParam->cols * getNumThreads(oo));
1086
1087                 double got = ba81ComputeMFit1(oo, want, gradient, hessian);
1088                 state->lastEMLL = got;
1089                 return got;
1090         } else {
1091                 // Major EM iteration, note completely different LL calculation
1092
1093                 double *patternLik = state->patternLik;
1094                 int *numIdentical = state->numIdentical;
1095                 int numUnique = state->numUnique;
1096                 double got = 0;
1097                 for (int ux=0; ux < numUnique; ux++) {
1098                         got += numIdentical[ux] * patternLik[ux];
1099                 }
1100                 return -2 * got;
1101         }
1102 }
1103
1104 /**
1105  * MAP is not affected by the number of items. EAP is. Likelihood can
1106  * get concentrated in a single quadrature ordinate. For 3PL, response
1107  * patterns can have a bimodal likelihood. This will confuse MAP and
1108  * is a key advantage of EAP (Thissen & Orlando, 2001, p. 136).
1109  *
1110  * Thissen, D. & Orlando, M. (2001). IRT for items scored in two
1111  * categories. In D. Thissen & H. Wainer (Eds.), \emph{Test scoring}
1112  * (pp 73-140). Lawrence Erlbaum Associates, Inc.
1113  */
1114 omxRListElement *
1115 ba81EAP(omxExpectation *oo, int *numReturns)
1116 {
1117         omxBA81State *state = (omxBA81State *) oo->argStruct;
1118         int maxDims = state->maxDims;
1119         int numSpecific = state->numSpecific;
1120         int priDims = maxDims - (numSpecific? 1 : 0);
1121         int maxAbilities = state->maxAbilities;
1122
1123         *numReturns = 4; // + (maxDims > 1) + (numSpecific > 1);
1124         omxRListElement *out = (omxRListElement*) R_alloc(*numReturns, sizeof(omxRListElement));
1125         int ox=0;
1126
1127         out[ox].numValues = -1;
1128         strcpy(out[ox].label, "latent.cov");
1129         out[ox].rows = maxAbilities;
1130         out[ox].cols = maxAbilities;
1131         out[ox].values = state->latentCovOut;
1132         ++ox;
1133
1134         out[ox].numValues = maxAbilities;
1135         strcpy(out[ox].label, "latent.mean");
1136         out[ox].values = state->latentMeanOut;
1137         ++ox;
1138
1139         out[ox].numValues = 1;
1140         strcpy(out[ox].label, "EM.LL");
1141         out[ox].values = &state->lastEMLL;
1142         ++ox;
1143
1144         omxData *data = state->data;
1145         int numUnique = state->numUnique;
1146
1147         // TODO Wainer & Thissen. (1987). Estimating ability with the wrong
1148         // model. Journal of Educational Statistics, 12, 339-368.
1149
1150         int numQpoints = state->targetQpoints * 2;  // make configurable TODO
1151
1152         if (numQpoints < 1 + 2.0 * sqrt(state->itemSpec->cols)) {
1153                 // Thissen & Orlando (2001, p. 136)
1154                 warning("EAP requires at least 2*sqrt(items) quadrature points");
1155         }
1156
1157         ba81SetupQuadrature(oo, numQpoints, 1);
1158         ba81Estep1(oo);   // recalc patternLik with a flat prior
1159
1160         /*
1161         double *cov = NULL;
1162         if (maxDims > 1) {
1163                 strcpy(out[2].label, "ability.cov");
1164                 out[2].numValues = -1;
1165                 out[2].rows = maxDims;
1166                 out[2].cols = maxDims;
1167                 out[2].values = (double*) R_alloc(out[2].rows * out[2].cols, sizeof(double));
1168                 cov = out[2].values;
1169                 OMXZERO(cov, out[2].rows * out[2].cols);
1170         }
1171         */
1172
1173         // Need a separate work space because the destination needs
1174         // to be in unsorted order with duplicated rows.
1175         double *ability = Calloc(numUnique * maxAbilities * 2, double);
1176
1177         for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
1178                 int quad[priDims];
1179                 decodeLocation(qx, priDims, state->quadGridSize, quad);
1180                 double where[priDims];
1181                 pointToWhere(state, quad, where, priDims);
1182                 double logArea = state->priLogQarea[qx];
1183
1184                 double *lxk;
1185                 if (numSpecific == 0) {
1186                         lxk = ba81LikelihoodFast(oo, 0, quad);
1187                 } else {
1188                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
1189                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
1190                         cai2010(oo, FALSE, quad, allSlxk, Slxk);
1191                         lxk = allSlxk;
1192                 }
1193
1194                 double *row = ability;
1195                 for (int px=0; px < numUnique; px++) {
1196                         double plik = exp(logArea + lxk[px]);
1197                         for (int dx=0; dx < priDims; dx++) {
1198                                 double piece = where[dx] * plik;
1199                                 row[dx*2] += piece;
1200                                 row[dx*2 + 1] += where[dx] * piece;
1201                                 // ignore cov, for now
1202                         }
1203                         row += 2 * maxAbilities;
1204                 }
1205         }
1206
1207         double *ris = Realloc(NULL, numUnique, double);
1208         for (int sx=0; sx < numSpecific; sx++) {
1209                 for (int sqx=0; sqx < state->quadGridSize; sqx++) {
1210                         double area = exp(state->speLogQarea[sx * state->quadGridSize + sqx]);
1211                         double ptArea = area * state->Qpoint[sqx];
1212                         OMXZERO(ris, numUnique);
1213                         for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
1214                                 int quad[maxDims];
1215                                 decodeLocation(qx, priDims, state->quadGridSize, quad);
1216                                 quad[priDims] = sqx;
1217
1218                                 double *allSlxk = CALC_ALLSLXK(state, numUnique);
1219                                 double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
1220                                 cai2010(oo, FALSE, quad, allSlxk, Slxk);
1221
1222                                 double *eis = Slxk + numUnique * sx;
1223                                 double *lxk = ba81LikelihoodFast(oo, sx, quad);
1224
1225                                 double logArea = state->priLogQarea[qx];
1226                                 for (int px=0; px < numUnique; px++) {
1227                                         ris[px] += exp(logArea + lxk[px] + allSlxk[px] - eis[px]);
1228                                 }
1229                         }
1230                         double *row = ability;
1231                         for (int px=0; px < numUnique; px++) {
1232                                 double piece = ris[px] * ptArea;
1233                                 row[(priDims + sx) * 2] += piece;
1234                                 row[(priDims + sx) * 2 + 1] += piece * state->Qpoint[sqx];
1235                                 row += 2 * maxAbilities;
1236                         }
1237                 }
1238         }
1239         Free(ris);
1240
1241         double *patternLik = state->patternLik;
1242         double *row = ability;
1243         for (int px=0; px < numUnique; px++) {
1244                 double denom = exp(patternLik[px]);
1245                 for (int ax=0; ax < maxAbilities; ax++) {
1246                         row[ax * 2] /= denom;
1247                         row[ax * 2 + 1] /= denom;
1248                         row[ax * 2 + 1] -= row[ax * 2] * row[ax * 2];
1249                 }
1250                 row += 2 * maxAbilities;
1251         }
1252
1253         /*
1254         // make symmetric
1255         for (int d1=0; d1 < maxDims; d1++) {
1256                 for (int d2=0; d2 < d1; d2++) {
1257                         cov[d2 * maxDims + d1] = cov[d1 * maxDims + d2];
1258                 }
1259         }
1260         */
1261
1262         for (int px=0; px < numUnique; px++) {
1263                 double *arow = ability + px * 2 * maxAbilities;
1264                 for (int dx=0; dx < maxAbilities; dx++) {
1265                         arow[dx*2+1] = sqrt(arow[dx*2+1]);
1266                 }
1267         }
1268
1269         strcpy(out[ox].label, "ability");
1270         out[ox].numValues = -1;
1271         out[ox].rows = 2 * maxAbilities;
1272         out[ox].cols = data->rows;
1273         out[ox].values = (double*) R_alloc(out[ox].rows * out[ox].cols, sizeof(double));
1274
1275         for (int rx=0; rx < numUnique; rx++) {
1276                 double *pa = ability + rx * 2 * maxAbilities;
1277
1278                 int dups = omxDataNumIdenticalRows(state->data, state->rowMap[rx]);
1279                 for (int dup=0; dup < dups; dup++) {
1280                         int dest = omxDataIndex(data, state->rowMap[rx]+dup);
1281                         memcpy(out[ox].values + dest * out[ox].rows, pa, sizeof(double) * 2 * maxAbilities);
1282                 }
1283         }
1284         Free(ability);
1285         ++ox;
1286
1287         for (int ix=0; ix < state->itemParam->cols; ix++) {
1288                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1289                 int id = spec[RPF_ISpecID];
1290                 double *param = omxMatrixColumn(state->itemParam, ix);
1291                 rpf_model[id].postfit(spec, param);
1292         }
1293
1294         return out;
1295 }
1296
1297 static void ba81Destroy(omxExpectation *oo) {
1298         if(OMX_DEBUG) {
1299                 Rprintf("Freeing %s function.\n", NAME);
1300         }
1301         omxBA81State *state = (omxBA81State *) oo->argStruct;
1302         //Rprintf("fit %d gradient %d\n", state->fitCount, state->gradientCount);
1303         omxFreeAllMatrixData(state->itemSpec);
1304         omxFreeAllMatrixData(state->itemParam);
1305         omxFreeAllMatrixData(state->EitemParam);
1306         omxFreeAllMatrixData(state->design);
1307         omxFreeAllMatrixData(state->customPrior);
1308         Free(state->logNumIdentical);
1309         Free(state->numIdentical);
1310         Free(state->Qpoint);
1311         Free(state->priLogQarea);
1312         Free(state->rowMap);
1313         Free(state->patternLik);
1314         Free(state->lxk);
1315         Free(state->Slxk);
1316         Free(state->allSlxk);
1317         Free(state->Sgroup);
1318         Free(state->expected);
1319         Free(state->paramMap);
1320         Free(state->thrDeriv);
1321         Free(state->latentMean);
1322         Free(state->latentCov);
1323         Free(state->latentMean1);
1324         Free(state->latentCov1);
1325         Free(state->latentMeanOut);
1326         Free(state->latentCovOut);
1327         Free(state);
1328 }
1329
1330 void getMatrixDims(SEXP r_theta, int *rows, int *cols)
1331 {
1332     SEXP matrixDims;
1333     PROTECT(matrixDims = getAttrib(r_theta, R_DimSymbol));
1334     int *dimList = INTEGER(matrixDims);
1335     *rows = dimList[0];
1336     *cols = dimList[1];
1337     UNPROTECT(1);
1338 }
1339
1340 void omxInitExpectationBA81(omxExpectation* oo) {
1341         omxState* currentState = oo->currentState;      
1342         SEXP rObj = oo->rObj;
1343         SEXP tmp;
1344         
1345         if(OMX_DEBUG) {
1346                 Rprintf("Initializing %s.\n", NAME);
1347         }
1348         if (!rpf_model) {
1349                 const int wantVersion = 3;
1350                 int version;
1351                 get_librpf_t get_librpf = (get_librpf_t) R_GetCCallable("rpf", "get_librpf_model_GPL");
1352                 (*get_librpf)(&version, &rpf_numModels, &rpf_model);
1353                 if (version < wantVersion) error("librpf binary API %d installed, at least %d is required",
1354                                                  version, wantVersion);
1355         }
1356         
1357         omxBA81State *state = Calloc(1, omxBA81State);
1358         oo->argStruct = (void*) state;
1359
1360         PROTECT(tmp = GET_SLOT(rObj, install("data")));
1361         state->data = omxDataLookupFromState(tmp, currentState);
1362
1363         if (strcmp(omxDataType(state->data), "raw") != 0) {
1364                 omxRaiseErrorf(currentState, "%s unable to handle data type %s", NAME, omxDataType(state->data));
1365                 return;
1366         }
1367
1368         state->itemSpec =
1369                 omxNewMatrixFromSlot(rObj, currentState, "ItemSpec");
1370         state->design =
1371                 omxNewMatrixFromSlot(rObj, currentState, "Design");
1372         state->itemParam =
1373                 omxNewMatrixFromSlot(rObj, currentState, "ItemParam");
1374         state->EitemParam =
1375                 omxNewMatrixFromSlot(rObj, currentState, "EItemParam");
1376         if (state->EitemParam) {
1377                 state->userEitemParam = TRUE;
1378         } else {
1379                 state->EitemParam =
1380                         omxInitTemporaryMatrix(NULL, state->itemParam->rows, state->itemParam->cols,
1381                                                TRUE, currentState);
1382         }
1383
1384         state->customPrior =
1385                 omxNewMatrixFromSlot(rObj, currentState, "CustomPrior");
1386         
1387         oo->computeFun = ba81Estep;
1388         oo->destructFun = ba81Destroy;
1389         
1390         // TODO: Exactly identical rows do not contribute any information.
1391         // The sorting algorithm ought to remove them so we don't waste RAM.
1392         // The following summary stats would be cheaper to calculate too.
1393
1394         int numUnique = 0;
1395         omxData *data = state->data;
1396         if (omxDataNumFactor(data) != data->cols) {
1397                 // verify they are ordered factors TODO
1398                 omxRaiseErrorf(currentState, "%s: all columns must be factors", NAME);
1399                 return;
1400         }
1401
1402         for (int rx=0; rx < data->rows;) {
1403                 rx += omxDataNumIdenticalRows(state->data, rx);
1404                 ++numUnique;
1405         }
1406         state->numUnique = numUnique;
1407
1408         state->rowMap = Realloc(NULL, numUnique, int);
1409         state->numIdentical = Realloc(NULL, numUnique, int);
1410         state->logNumIdentical = Realloc(NULL, numUnique, double);
1411
1412         int numItems = state->itemParam->cols;
1413         if (data->cols != numItems) {
1414                 error("Data has %d columns for %d items", data->cols, numItems);
1415         }
1416
1417         for (int rx=0, ux=0; rx < data->rows; ux++) {
1418                 if (rx == 0) {
1419                         // all NA rows will sort to the top
1420                         int na=0;
1421                         for (int ix=0; ix < numItems; ix++) {
1422                                 if (omxIntDataElement(data, 0, ix) == NA_INTEGER) { ++na; }
1423                         }
1424                         if (na == numItems) {
1425                                 omxRaiseErrorf(currentState, "Remove rows with all NAs");
1426                                 return;
1427                         }
1428                 }
1429                 int dups = omxDataNumIdenticalRows(state->data, rx);
1430                 state->numIdentical[ux] = dups;
1431                 state->logNumIdentical[ux] = log(dups);
1432                 state->rowMap[ux] = rx;
1433                 rx += dups;
1434         }
1435
1436         state->patternLik = Realloc(NULL, numUnique, double);
1437
1438         int numThreads = getNumThreads(oo);
1439
1440         int maxSpec = 0;
1441         int maxParam = 0;
1442         state->maxDims = 0;
1443         state->maxOutcomes = 0;
1444
1445         for (int ix=0; ix < numItems; ix++) {
1446                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1447                 int id = spec[RPF_ISpecID];
1448                 if (id < 0 || id >= rpf_numModels) {
1449                         omxRaiseErrorf(currentState, "ItemSpec column %d has unknown item model %d", ix, id);
1450                         return;
1451                 }
1452
1453                 double *param = omxMatrixColumn(state->itemParam, ix);
1454                 rpf_model[id].prefit(spec, param);
1455         }
1456
1457         int totalOutcomes = 0;
1458         for (int cx = 0; cx < data->cols; cx++) {
1459                 const double *spec = omxMatrixColumn(state->itemSpec, cx);
1460                 int id = spec[RPF_ISpecID];
1461                 int dims = spec[RPF_ISpecDims];
1462                 if (state->maxDims < dims)
1463                         state->maxDims = dims;
1464
1465                 int no = spec[RPF_ISpecOutcomes];
1466                 totalOutcomes += no;
1467                 if (state->maxOutcomes < no)
1468                         state->maxOutcomes = no;
1469
1470                 // TODO this summary stat should be available from omxData
1471                 int dataMax=0;
1472                 for (int rx=0; rx < data->rows; rx++) {
1473                         int pick = omxIntDataElementUnsafe(data, rx, cx);
1474                         if (dataMax < pick)
1475                                 dataMax = pick;
1476                 }
1477                 if (dataMax > no) {
1478                         error("Data for item %d has %d outcomes, not %d", cx+1, dataMax, no);
1479                 } else if (dataMax < no) {
1480                         warning("Data for item %d has only %d outcomes, not %d", cx+1, dataMax, no);
1481                         // promote to error?
1482                         // should complain if an outcome is not represented in the data TODO
1483                 }
1484
1485                 int numSpec = (*rpf_model[id].numSpec)(spec);
1486                 if (maxSpec < numSpec)
1487                         maxSpec = numSpec;
1488
1489                 int numParam = (*rpf_model[id].numParam)(spec);
1490                 if (maxParam < numParam)
1491                         maxParam = numParam;
1492         }
1493
1494         state->totalOutcomes = totalOutcomes;
1495
1496         if (state->itemSpec->cols != data->cols || state->itemSpec->rows != maxSpec) {
1497                 omxRaiseErrorf(currentState, "ItemSpec must have %d item columns and %d rows",
1498                                data->cols, maxSpec);
1499                 return;
1500         }
1501         if (state->itemParam->rows != maxParam) {
1502                 omxRaiseErrorf(currentState, "ItemParam should have %d rows", maxParam);
1503                 return;
1504         }
1505
1506         state->derivPadSize = maxParam + maxParam*(1+maxParam)/2;
1507
1508         if (state->design == NULL) {
1509                 state->maxAbilities = state->maxDims;
1510                 state->design = omxInitTemporaryMatrix(NULL, state->maxDims, numItems,
1511                                        TRUE, currentState);
1512                 for (int ix=0; ix < numItems; ix++) {
1513                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1514                         int dims = spec[RPF_ISpecDims];
1515                         for (int dx=0; dx < state->maxDims; dx++) {
1516                                 omxSetMatrixElement(state->design, dx, ix, dx < dims? (double)dx+1 : nan(""));
1517                         }
1518                 }
1519         } else {
1520                 omxMatrix *design = state->design;
1521                 if (design->cols != numItems ||
1522                     design->rows != state->maxDims) {
1523                         omxRaiseErrorf(currentState, "Design matrix should have %d rows and %d columns",
1524                                        state->maxDims, numItems);
1525                         return;
1526                 }
1527
1528                 state->maxAbilities = 0;
1529                 for (int ix=0; ix < design->rows * design->cols; ix++) {
1530                         double got = design->data[ix];
1531                         if (!R_FINITE(got)) continue;
1532                         if (round(got) != (int)got) error("Design matrix can only contain integers"); // TODO better way?
1533                         if (state->maxAbilities < got)
1534                                 state->maxAbilities = got;
1535                 }
1536                 for (int ix=0; ix < design->cols; ix++) {
1537                         const double *idesign = omxMatrixColumn(design, ix);
1538                         int ddim = 0;
1539                         for (int rx=0; rx < design->rows; rx++) {
1540                                 if (isfinite(idesign[rx])) ddim += 1;
1541                         }
1542                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1543                         int dims = spec[RPF_ISpecDims];
1544                         if (ddim > dims) error("Item %d has %d dims but design assigns %d", ix, dims, ddim);
1545                 }
1546         }
1547         if (state->maxAbilities <= state->maxDims) {
1548                 state->Sgroup = Calloc(numItems, int);
1549         } else {
1550                 // Not sure if this is correct, revisit TODO
1551                 int Sgroup0 = -1;
1552                 state->Sgroup = Realloc(NULL, numItems, int);
1553                 for (int dx=0; dx < state->maxDims; dx++) {
1554                         for (int ix=0; ix < numItems; ix++) {
1555                                 int ability = omxMatrixElement(state->design, dx, ix);
1556                                 if (dx < state->maxDims - 1) {
1557                                         if (Sgroup0 <= ability)
1558                                                 Sgroup0 = ability+1;
1559                                         continue;
1560                                 }
1561                                 int ss=-1;
1562                                 if (ability >= Sgroup0) {
1563                                         if (ss == -1) {
1564                                                 ss = ability;
1565                                         } else {
1566                                                 omxRaiseErrorf(currentState, "Item %d cannot belong to more than "
1567                                                                "1 specific dimension (both %d and %d)",
1568                                                                ix, ss, ability);
1569                                                 return;
1570                                         }
1571                                 }
1572                                 if (ss == -1) ss = Sgroup0;
1573                                 state->Sgroup[ix] = ss - Sgroup0;
1574                         }
1575                 }
1576                 state->numSpecific = state->maxAbilities - state->maxDims + 1;
1577                 state->allSlxk = Realloc(NULL, numUnique * numThreads, double);
1578                 state->Slxk = Realloc(NULL, numUnique * state->numSpecific * numThreads, double);
1579         }
1580
1581         PROTECT(tmp = GET_SLOT(rObj, install("free.mean")));
1582         state->freeMean = Realloc(NULL, state->maxAbilities, int);
1583         if (isNull(tmp)) {
1584                 OMXZERO(state->freeMean, state->maxAbilities);
1585         } else {
1586                 if (length(tmp) != state->maxAbilities) {
1587                         error("Specify for %d means whether to freely estimate", state->maxAbilities);
1588                 }
1589                 memcpy(state->freeMean, LOGICAL(tmp), sizeof(int)*state->maxAbilities);
1590         }
1591
1592         PROTECT(tmp = GET_SLOT(rObj, install("free.cov")));
1593         state->freeCov = Realloc(NULL, state->maxAbilities * state->maxAbilities, int);
1594         if (isNull(tmp)) {
1595                 OMXZERO(state->freeCov, state->maxAbilities * state->maxAbilities);
1596         } else {
1597                 int rows;
1598                 int cols;
1599                 getMatrixDims(tmp, &rows, &cols);
1600                 if (rows != state->maxAbilities && cols != state->maxAbilities) {
1601                         error("free.cov must be a %d by %d matrix of logicals",
1602                               state->maxAbilities, state->maxAbilities);
1603                 }
1604                 memcpy(state->freeCov, LOGICAL(tmp),
1605                        sizeof(int)*state->maxAbilities * state->maxAbilities);
1606         }
1607
1608         PROTECT(tmp = GET_SLOT(rObj, install("cache")));
1609         state->cacheLXK = asLogical(tmp);
1610
1611         PROTECT(tmp = GET_SLOT(rObj, install("rescale")));
1612         state->rescale = asLogical(tmp);
1613
1614         PROTECT(tmp = GET_SLOT(rObj, install("qpoints")));
1615         state->targetQpoints = asReal(tmp);
1616
1617         PROTECT(tmp = GET_SLOT(rObj, install("qwidth")));
1618         state->Qwidth = asReal(tmp);
1619
1620         state->latentMean = Realloc(NULL, state->maxAbilities * numUnique, double);
1621         state->latentCov = Realloc(NULL, state->maxAbilities * state->maxAbilities * numUnique, double);
1622         state->latentMean1 = Realloc(NULL, state->maxDims, double);
1623         state->latentCov1 = Realloc(NULL, state->maxDims * state->maxDims, double);
1624         state->latentMeanOut = Calloc(state->maxAbilities, double);
1625         state->latentCovOut = Realloc(NULL, state->maxAbilities * state->maxAbilities, double);
1626
1627         for (int d1=0; d1 < state->maxAbilities; d1++) {
1628                 for (int d2=0; d2 < state->maxAbilities; d2++) {
1629                         state->latentCovOut[d1 * state->maxAbilities + d2] = d1==d2? 1 : 0;
1630                 }
1631         }
1632
1633         ba81SetupQuadrature(oo, state->targetQpoints, 0);
1634
1635         buildParamMap(oo);
1636
1637         // verify data bounded between 1 and numOutcomes TODO
1638         // hm, looks like something could be added to omxData for column summary stats?
1639 }