Rename
[openmx:openmx.git] / src / omxExpectationBA81.cpp
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 // Consider replacing log() with log2() in some places? Not worth it?
19
20 #include <Rmath.h>
21 #include "omxExpectation.h"
22 #include "omxOpenmpWrap.h"
23 #include "npsolWrap.h"
24 #include "libirt-rpf.h"
25 #include "omxOptimizer.h"  // remove TODO
26 #include "dmvnorm.h"
27
28 static const char *NAME = "ExpectationBA81";
29
30 static const struct rpf *rpf_model = NULL;
31 static int rpf_numModels;
32 static const double MIN_PATTERNLIK = 1e-100;
33
34 enum score_option {
35         SCORES_OMIT,
36         SCORES_UNIQUE,
37         SCORES_FULL
38 };
39
40 typedef struct {
41
42         // data characteristics
43         omxData *data;
44         int numUnique;
45         int *numIdentical;        // length numUnique
46         double *logNumIdentical;  // length numUnique
47         int *rowMap;              // length numUnique
48
49         // item description related
50         omxMatrix *itemSpec;
51         int maxOutcomes;
52         int maxDims;
53         int maxAbilities;
54         int numSpecific;
55         int *Sgroup;              // item's specific group 0..numSpecific-1
56         omxMatrix *design;        // items * maxDims
57
58         // quadrature related
59         double Qwidth;
60         double targetQpoints;
61         long quadGridSize;
62         long totalQuadPoints;     // quadGridSize ^ maxDims
63         long totalPrimaryPoints;  // totalQuadPoints except for specific dim TODO
64         double *Qpoint;           // quadGridSize
65         double *priLogQarea;      // totalPrimaryPoints
66         double *speLogQarea;      // quadGridSize * numSpecific
67
68         // estimation related
69         omxMatrix *EitemParam;    // E step version
70         int userEitemParam;
71         int rescale;
72         omxMatrix *itemParam;     // M step version
73         omxMatrix *customPrior;   // TODO remove?
74         int derivPadSize;         // maxParam + maxParam*(1+maxParam)/2
75         double *thrDeriv;         // itemParam->cols * derivPadSize * thread
76         int *paramMap;            // itemParam->cols * derivPadSize -> index of free parameter
77         int cacheLXK;
78         double *lxk;              // wo/cache, numUnique * thread
79         double *allSlxk;          // numUnique * thread
80         double *Slxk;             // numUnique * #specific dimensions * thread
81         double *patternLik;       // numUnique
82         int totalOutcomes;
83         double *expected;         // totalOutcomes * totalQuadPoints
84         double *ElatentMean;      // maxAbilities * numUnique
85         double *ElatentCov;       // maxAbilities * maxAbilities * numUnique ; only lower triangle is used
86         double *tmpLatentMean;    // maxDims
87         double *tmpLatentCov;     // maxDims * maxDims ; only lower triangle is used
88         omxMatrix *latentMeanOut;
89         omxMatrix *latentCovOut;
90         int choleskyError;
91
92         enum score_option scores;
93         int gradientCount;
94         int fitCount;
95         double lastEMLL;
96 } omxBA81State;
97
98 static void
99 pda(const double *ar, int rows, int cols) {   // column major order
100         for (int rx=0; rx < rows; rx++) {
101                 for (int cx=0; cx < cols; cx++) {
102                         Rprintf("%.6g, ", ar[cx * rows + rx]);
103                 }
104                 Rprintf("\n");
105         }
106 }
107
108 static void
109 pia(const int *ar, int rows, int cols) {   // column major order
110         for (int rx=0; rx < rows; rx++) {
111                 for (int cx=0; cx < cols; cx++) {
112                         Rprintf("%d, ", ar[cx * rows + rx]);
113                 }
114                 Rprintf("\n");
115         }
116 }
117
118 static int
119 getNumThreads(omxExpectation* oo)
120 {
121         int numThreads = oo->currentState->numThreads;
122         if (numThreads < 1) numThreads = 1;
123         return numThreads;
124 }
125
126 static void buildParamMap(omxExpectation* oo)
127 {
128         omxState* currentState = oo->currentState;
129         omxBA81State *state = (omxBA81State *) oo->argStruct;
130         omxMatrix *itemParam = state->itemParam;
131         int size = itemParam->cols * state->derivPadSize;
132
133         state->paramMap = Realloc(NULL, size, int);
134         for (int px=0; px < size; px++) {
135                 state->paramMap[px] = -1;
136         }
137
138         int numFreeParams = currentState->numFreeParams;
139         int *pRow = Realloc(NULL, numFreeParams, int);
140         int *pCol = Realloc(NULL, numFreeParams, int);
141
142         for (int px=0; px < numFreeParams; px++) {
143                 pRow[px] = -1;
144                 pCol[px] = -1;
145                 omxFreeVar *fv = currentState->freeVarList + px;
146                 for (int lx=0; lx < fv->numLocations; lx++) {
147                         if (~fv->matrices[lx] == itemParam->matrixNumber) {
148                                 pRow[px] = fv->row[lx];
149                                 pCol[px] = fv->col[lx];
150                                 int at = pCol[px] * state->derivPadSize + pRow[px];
151                                 state->paramMap[at] = px;
152                         }
153                 }
154         }
155
156         for (int p1=0; p1 < numFreeParams; p1++) {
157                 for (int p2=p1; p2 < numFreeParams; p2++) {
158                         if (pCol[p1] == -1 || pCol[p1] != pCol[p2]) continue;
159                         const double *spec = omxMatrixColumn(state->itemSpec, pCol[p1]);
160                         int id = spec[RPF_ISpecID];
161                         int numParam = (*rpf_model[id].numParam)(spec);
162                         int r1 = pRow[p1];
163                         int r2 = pRow[p2];
164                         if (r1 > r2) { int tmp=r1; r1=r2; r2=tmp; }
165                         int rowOffset = 0;
166                         for (int rx=1; rx <= r2; rx++) rowOffset += rx;
167                         int at = pCol[p1] * state->derivPadSize + numParam + rowOffset + r1;
168                         state->paramMap[at] = numFreeParams + p1 * numFreeParams + p2;
169                 }
170         }
171
172         Free(pRow);
173         Free(pCol);
174
175         state->thrDeriv = Realloc(NULL, itemParam->cols * state->derivPadSize * getNumThreads(oo), double);
176 }
177
178 OMXINLINE static void
179 pointToWhere(omxBA81State *state, const int *quad, double *where, int upto)
180 {
181         for (int dx=0; dx < upto; dx++) {
182                 where[dx] = state->Qpoint[quad[dx]];
183         }
184 }
185
186 OMXINLINE static void
187 assignDims(omxMatrix *itemSpec, omxMatrix *design, int dims, int maxDims, int ix,
188            const double *restrict theta, double *restrict ptheta)
189 {
190         for (int dx=0; dx < dims; dx++) {
191                 int ability = (int)omxMatrixElement(design, dx, ix) - 1;
192                 if (ability >= maxDims) ability = maxDims-1;
193                 ptheta[dx] = theta[ability];
194         }
195 }
196
197 /**
198  * \param theta Vector of ability parameters, one per ability
199  * \returns A numItems by maxOutcomes colMajor vector of doubles. Caller must Free it.
200  */
201 static double *
202 computeRPF(omxExpectation *oo, omxMatrix *itemParam, const int *quad)
203 {
204         omxBA81State *state = (omxBA81State*) oo->argStruct;
205         omxMatrix *itemSpec = state->itemSpec;
206         int numItems = itemSpec->cols;
207         omxMatrix *design = state->design;
208         int maxDims = state->maxDims;
209
210         double theta[maxDims];
211         pointToWhere(state, quad, theta, maxDims);
212
213         double *outcomeProb = Realloc(NULL, numItems * state->maxOutcomes, double);
214         //double *outcomeProb = Calloc(numItems * state->maxOutcomes, double);
215
216         for (int ix=0; ix < numItems; ix++) {
217                 const double *spec = omxMatrixColumn(itemSpec, ix);
218                 double *iparam = omxMatrixColumn(itemParam, ix);
219                 double *out = outcomeProb + ix * state->maxOutcomes;
220                 int id = spec[RPF_ISpecID];
221                 int dims = spec[RPF_ISpecDims];
222                 double ptheta[dims];
223                 assignDims(itemSpec, design, dims, maxDims, ix, theta, ptheta);
224                 (*rpf_model[id].logprob)(spec, iparam, ptheta, out);
225 #if 0
226                 for (int ox=0; ox < spec[RPF_ISpecOutcomes]; ox++) {
227                         if (!isfinite(out[ox]) || out[ox] > 0) {
228                                 Rprintf("spec\n");
229                                 pda(spec, itemSpec->rows, 1);
230                                 Rprintf("item param\n");
231                                 pda(iparam, itemParam->rows, 1);
232                                 Rprintf("where\n");
233                                 pda(ptheta, dims, 1);
234                                 error("RPF returned %20.20f", out[ox]);
235                         }
236                 }
237 #endif
238         }
239
240         return outcomeProb;
241 }
242
243 OMXINLINE static long
244 encodeLocation(const int dims, const long grid, const int *restrict quad)
245 {
246         long qx = 0;
247         for (int dx=dims-1; dx >= 0; dx--) {
248                 qx = qx * grid;
249                 qx += quad[dx];
250         }
251         return qx;
252 }
253
254 OMXINLINE static double *
255 getLXKcache(omxBA81State *state, const int *quad, const int specific)
256 {
257         long ordinate;
258         if (state->numSpecific == 0) {
259                 ordinate = encodeLocation(state->maxDims, state->quadGridSize, quad);
260         } else {
261                 ordinate = (specific * state->totalQuadPoints +
262                             encodeLocation(state->maxDims, state->quadGridSize, quad));
263         }
264         return state->lxk + state->numUnique * ordinate;
265 }
266
267 OMXINLINE static double *
268 ba81Likelihood(omxExpectation *oo, int specific, const int *restrict quad)
269 {
270         omxBA81State *state = (omxBA81State*) oo->argStruct;
271         int numUnique = state->numUnique;
272         int maxOutcomes = state->maxOutcomes;
273         omxData *data = state->data;
274         int numItems = state->itemSpec->cols;
275         int *restrict Sgroup = state->Sgroup;
276         double *restrict lxk;
277
278         if (!state->cacheLXK) {
279                 lxk = state->lxk + numUnique * omx_absolute_thread_num();
280         } else {
281                 lxk = getLXKcache(state, quad, specific);
282         }
283
284         const double *outcomeProb = computeRPF(oo, state->EitemParam, quad);
285         if (!outcomeProb) {
286                 OMXZERO(lxk, numUnique);
287                 return lxk;
288         }
289
290         const int *rowMap = state->rowMap;
291         for (int px=0; px < numUnique; px++) {
292                 double lxk1 = 0;
293                 for (int ix=0; ix < numItems; ix++) {
294                         if (specific != Sgroup[ix]) continue;
295                         int pick = omxIntDataElementUnsafe(data, rowMap[px], ix);
296                         if (pick == NA_INTEGER) continue;
297                         double piece = outcomeProb[ix * maxOutcomes + pick-1];  // move -1 elsewhere TODO
298                         lxk1 += piece;
299                 }
300 #if 0
301 #pragma omp critical(ba81LikelihoodDebug1)
302                 if (!isfinite(lxk1) || lxk1 > numItems) {
303                         Rprintf("where\n");
304                         double where[state->maxDims];
305                         pointToWhere(state, quad, where, state->maxDims);
306                         pda(where, state->maxDims, 1);
307                         Rprintf("prob\n");
308                         pda(outcomeProb, numItems, maxOutcomes);
309                         error("Likelihood of row %d is %f", rowMap[px], lxk1);
310                 }
311 #endif
312                 lxk[px] = lxk1;
313         }
314
315         Free(outcomeProb);
316
317         return lxk;
318 }
319
320 OMXINLINE static double *
321 ba81LikelihoodFast(omxExpectation *oo, int specific, const int *restrict quad)
322 {
323         omxBA81State *state = (omxBA81State*) oo->argStruct;
324         if (!state->cacheLXK) {
325                 return ba81Likelihood(oo, specific, quad);
326         } else {
327                 return getLXKcache(state, quad, specific);
328         }
329
330 }
331
332 OMXINLINE static void
333 mapLatentSpace(omxBA81State *state, int px, int sgroup, double piece, const double *where)
334 {
335         double *ElatentMean = state->ElatentMean;
336         double *ElatentCov = state->ElatentCov;
337         int maxDims = state->maxDims;
338         int maxAbilities = state->maxAbilities;
339         int pmax = maxDims;
340         if (state->numSpecific) pmax -= 1;
341
342         if (sgroup == 0) {
343                 for (int d1=0; d1 < pmax; d1++) {
344                         double piece_w1 = piece * where[d1];
345                         int mloc = px * maxAbilities + d1;
346 #pragma omp atomic
347                         ElatentMean[mloc] += piece_w1;
348                         for (int d2=0; d2 <= d1; d2++) {
349                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
350                                 double piece_cov = piece_w1 * where[d2];
351 #pragma omp atomic
352                                 ElatentCov[loc] += piece_cov;
353                         }
354                 }
355         }
356
357         if (state->numSpecific) {
358                 int sdim = maxDims + sgroup - 1;
359
360                 double piece_w1 = piece * where[maxDims-1];
361                 int mloc = px * maxAbilities + sdim;
362 #pragma omp atomic
363                 ElatentMean[mloc] += piece_w1;
364
365                 int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
366                 double piece_var = piece_w1 * where[maxDims-1];
367 #pragma omp atomic
368                 ElatentCov[loc] += piece_var;
369         }
370 }
371
372 OMXINLINE static double
373 logAreaProduct(omxBA81State *state, const int *restrict quad, const int sg)
374 {
375         int maxDims = state->maxDims;
376         if (state->numSpecific == 0) {
377                 long qloc = encodeLocation(maxDims, state->quadGridSize, quad);
378                 return state->priLogQarea[qloc];
379         } else {
380                 long priloc = encodeLocation(maxDims-1, state->quadGridSize, quad);
381                 return (state->priLogQarea[priloc] +
382                         state->speLogQarea[sg * state->quadGridSize + quad[maxDims - 1]]);
383         }
384 }
385
386 #define CALC_ALLSLXK(state, numUnique) \
387         (state->allSlxk + omx_absolute_thread_num() * (numUnique))
388
389 #define CALC_SLXK(state, numUnique, numSpecific) \
390         (state->Slxk + omx_absolute_thread_num() * (numUnique) * (numSpecific))
391
392 OMXINLINE static void
393 cai2010(omxExpectation* oo, int recompute, const int *restrict primaryQuad,
394         double *restrict allSlxk, double *restrict Slxk)
395 {
396         omxBA81State *state = (omxBA81State*) oo->argStruct;
397         int numUnique = state->numUnique;
398         int numSpecific = state->numSpecific;
399         int maxDims = state->maxDims;
400         int sDim = maxDims-1;
401
402         int quad[maxDims];
403         memcpy(quad, primaryQuad, sizeof(int)*sDim);
404
405         OMXZERO(Slxk, numUnique * numSpecific);
406         OMXZERO(allSlxk, numUnique);
407
408         for (int sx=0; sx < numSpecific; sx++) {
409                 double *eis = Slxk + numUnique * sx;
410                 int quadGridSize = state->quadGridSize;
411
412                 for (int qx=0; qx < quadGridSize; qx++) {
413                         quad[sDim] = qx;
414                         double where[maxDims];
415                         pointToWhere(state, quad, where, maxDims);
416
417                         double *lxk;
418                         if (recompute) {
419                                 lxk = ba81Likelihood(oo, sx, quad);
420                         } else {
421                                 lxk = getLXKcache(state, quad, sx);
422                         }
423
424                         for (int ix=0; ix < numUnique; ix++) {
425                                 eis[ix] += exp(lxk[ix] + state->priLogQarea[qx]);
426                         }
427                 }
428
429                 for (int px=0; px < numUnique; px++) {
430                         eis[px] = log(eis[px]);
431                         allSlxk[px] += eis[px];
432                 }
433         }
434 }
435
436 OMXINLINE static void
437 decodeLocation(long qx, const int dims, const long grid,
438                int *restrict quad)
439 {
440         for (int dx=0; dx < dims; dx++) {
441                 quad[dx] = qx % grid;
442                 qx = qx / grid;
443         }
444 }
445
446 static void
447 ba81Estep1(omxExpectation *oo) {
448         if(OMX_DEBUG_MML) {Rprintf("Beginning %s Computation.\n", NAME);}
449
450         omxBA81State *state = (omxBA81State*) oo->argStruct;
451         double *patternLik = state->patternLik;
452         int numUnique = state->numUnique;
453         int numSpecific = state->numSpecific;
454         double *ElatentMean = state->ElatentMean;
455         double *ElatentCov = state->ElatentCov;
456         int maxDims = state->maxDims;
457         int maxAbilities = state->maxAbilities;
458         int primaryDims = maxDims;
459
460         OMXZERO(patternLik, numUnique);
461         OMXZERO(ElatentMean, numUnique * maxAbilities);
462         OMXZERO(ElatentCov, numUnique * maxAbilities * maxAbilities);
463
464         // E-step, marginalize person ability
465         //
466         // Note: In the notation of Bock & Aitkin (1981) and
467         // Cai~(2010), these loops are reversed.  That is, the inner
468         // loop is over quadrature points and the outer loop is over
469         // all response patterns.
470         //
471         if (numSpecific == 0) {
472 #pragma omp parallel for num_threads(oo->currentState->numThreads)
473                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
474                         int quad[maxDims];
475                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
476                         double where[maxDims];
477                         pointToWhere(state, quad, where, maxDims);
478
479                         double *lxk = ba81Likelihood(oo, 0, quad);
480
481                         double logArea = state->priLogQarea[qx];
482 #pragma omp critical(EstepUpdate)
483                         for (int px=0; px < numUnique; px++) {
484                                 double tmp = exp(lxk[px] + logArea);
485 #if 0
486                                 if (!isfinite(tmp)) {
487                                         Rprintf("where\n");
488                                         pda(where, maxDims, 1);
489                                         error("Row %d lxk %f logArea %f tmp %f",
490                                               state->rowMap[px], lxk[px], logArea, tmp);
491                                 }
492 #endif
493                                 patternLik[px] += tmp;
494                                 mapLatentSpace(state, px, 0, tmp, where);
495                         }
496                 }
497         } else {
498                 primaryDims -= 1;
499                 int sDim = primaryDims;
500                 long specificPoints = state->quadGridSize;
501
502 #pragma omp parallel for num_threads(oo->currentState->numThreads)
503                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
504                         int quad[maxDims];
505                         decodeLocation(qx, primaryDims, state->quadGridSize, quad);
506
507                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
508                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
509                         cai2010(oo, TRUE, quad, allSlxk, Slxk);
510
511                         for (int sgroup=0; sgroup < numSpecific; sgroup++) {
512                                 double *eis = Slxk + numUnique * sgroup;
513                                 for (long sx=0; sx < specificPoints; sx++) {
514                                         quad[sDim] = sx;
515                                         double where[maxDims];
516                                         pointToWhere(state, quad, where, maxDims);
517                                         double logArea = logAreaProduct(state, quad, sgroup);
518                                         double *lxk = ba81LikelihoodFast(oo, sgroup, quad);
519                                         for (int px=0; px < numUnique; px++) {
520                                                 double tmp = exp((allSlxk[px] - eis[px]) + lxk[px] + logArea);
521                                                 mapLatentSpace(state, px, sgroup, tmp, where);
522                                         }
523                                 }
524                         }
525
526                         double priLogArea = state->priLogQarea[qx];
527 #pragma omp critical(EstepUpdate)
528                         for (int px=0; px < numUnique; px++) {
529                                 double tmp = exp(allSlxk[px] + priLogArea);
530                                 patternLik[px] += tmp;  // is it faster to make this line atomic? TODO
531                         }
532                 }
533         }
534
535         int *numIdentical = state->numIdentical;
536
537         if(0) {
538                 Rprintf("weight\n");
539                 for (int px=0; px < numUnique; px++) {
540                         double weight = numIdentical[px] / patternLik[px];
541                         Rprintf("%20.20f\n", weight);
542                 }
543
544                 Rprintf("per item mean\n");
545                 for (int px=0; px < numUnique; px++) {
546                         Rprintf("[%d] %20.20f\n", px, ElatentMean[px * maxAbilities]);
547                 }
548         }
549
550         for (int px=0; px < numUnique; px++) {
551                 if (patternLik[px] < MIN_PATTERNLIK) {
552                         patternLik[px] = MIN_PATTERNLIK;
553                         warning("Likelihood of pattern %d is 0, forcing to %.3g",
554                                 px, MIN_PATTERNLIK);
555                 }
556
557                 double weight = numIdentical[px] / patternLik[px];
558                 for (int d1=0; d1 < primaryDims; d1++) {
559                         ElatentMean[px * maxAbilities + d1] *= weight;
560                         for (int d2=0; d2 <= d1; d2++) {
561                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
562                                 ElatentCov[loc] *= weight;
563                         }
564                 }
565                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
566                         ElatentMean[px * maxAbilities + sdim] *= weight;
567                         int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
568                         ElatentCov[loc] *= weight;
569                 }
570 #if 0
571                 if (!isfinite(patternLik[px])) {
572                         error("Likelihood of row %d is %f", state->rowMap[px], patternLik[px]);
573                 }
574 #endif
575                 patternLik[px] = log(patternLik[px]);
576         }
577
578         for (int px=1; px < numUnique; px++) {
579                 for (int d1=0; d1 < primaryDims; d1++) {
580                         ElatentMean[d1] += ElatentMean[px * maxAbilities + d1];
581                         for (int d2=0; d2 <= d1; d2++) {
582                                 int cell = d2 * maxAbilities + d1;
583                                 int loc = px * maxAbilities * maxAbilities + cell;
584                                 ElatentCov[cell] += ElatentCov[loc];
585                         }
586                 }
587                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
588                         ElatentMean[sdim] += ElatentMean[px * maxAbilities + sdim];
589                         int cell = sdim * maxAbilities + sdim;
590                         int loc = px * maxAbilities * maxAbilities + cell;
591                         ElatentCov[cell] += ElatentCov[loc];
592                 }
593         }
594
595         //pda(ElatentMean, state->maxAbilities, 1);
596         //pda(ElatentCov, state->maxAbilities, state->maxAbilities);
597
598         omxData *data = state->data;
599         for (int d1=0; d1 < maxAbilities; d1++) {
600                 ElatentMean[d1] /= data->rows;
601         }
602
603         for (int d1=0; d1 < primaryDims; d1++) {
604                 for (int d2=0; d2 <= d1; d2++) {
605                         int cell = d2 * maxAbilities + d1;
606                         int tcell = d1 * maxAbilities + d2;
607                         ElatentCov[tcell] = ElatentCov[cell] =
608                                 ElatentCov[cell] / data->rows - ElatentMean[d1] * ElatentMean[d2];
609                 }
610         }
611         for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
612                 int cell = sdim * maxAbilities + sdim;
613                 ElatentCov[cell] = ElatentCov[cell] / data->rows - ElatentMean[sdim] * ElatentMean[sdim];
614         }
615
616         //Rprintf("E-step\n");
617         //pda(ElatentMean, state->maxAbilities, 1);
618         //pda(ElatentCov, state->maxAbilities, state->maxAbilities);
619 }
620
621 static void
622 schilling_bock_2005_rescale(omxExpectation *oo)
623 {
624         omxBA81State *state = (omxBA81State*) oo->argStruct;
625         omxMatrix *itemSpec = state->itemSpec;
626         omxMatrix *itemParam = state->itemParam;
627         omxMatrix *design = state->design;
628         double *ElatentMean = state->ElatentMean;
629         double *ElatentCov = state->ElatentCov;
630         double *tmpLatentMean = state->tmpLatentMean;
631         double *tmpLatentCov = state->tmpLatentCov;
632         int maxAbilities = state->maxAbilities;
633         int maxDims = state->maxDims;
634
635         //Rprintf("schilling bock\n");
636         //pda(ElatentMean, maxAbilities, 1);
637         //pda(ElatentCov, maxAbilities, maxAbilities);
638         //omxPrint(design, "design");
639
640         const char triangle = 'L';
641         F77_CALL(dpotrf)(&triangle, &maxAbilities, ElatentCov, &maxAbilities, &state->choleskyError);
642         if (state->choleskyError != 0) {
643                 warning("Cholesky failed with %d; rescaling disabled", state->choleskyError); // make error TODO?
644                 return;
645         }
646
647         int numItems = itemParam->cols;
648         for (int ix=0; ix < numItems; ix++) {
649                 const double *spec = omxMatrixColumn(itemSpec, ix);
650                 int id = spec[RPF_ISpecID];
651                 const double *rawDesign = omxMatrixColumn(design, ix);
652                 int idesign[design->rows];
653                 int idx = 0;
654                 for (int dx=0; dx < design->rows; dx++) {
655                         if (isfinite(rawDesign[dx])) {
656                                 idesign[idx++] = rawDesign[dx]-1;
657                         } else {
658                                 idesign[idx++] = -1;
659                         }
660                 }
661                 for (int d1=0; d1 < idx; d1++) {
662                         if (idesign[d1] == -1) {
663                                 tmpLatentMean[d1] = 0;
664                         } else {
665                                 tmpLatentMean[d1] = ElatentMean[idesign[d1]];
666                         }
667                         for (int d2=0; d2 <= d1; d2++) {
668                                 int cell = idesign[d2] * maxAbilities + idesign[d1];
669                                 if (idesign[d1] == -1 || idesign[d2] == -1) {
670                                         tmpLatentCov[d2 * maxDims + d1] = d1==d2? 1 : 0;
671                                 } else {
672                                         tmpLatentCov[d2 * maxDims + d1] = ElatentCov[cell];
673                                 }
674                         }
675                 }
676                 if (1) {  // ease debugging, make optional TODO
677                         for (int d1=idx; d1 < maxDims; d1++) tmpLatentMean[d1] = nan("");
678                         for (int d1=0; d1 < maxDims; d1++) {
679                                 for (int d2=0; d2 < maxDims; d2++) {
680                                         if (d1 < idx && d2 < idx) continue;
681                                         tmpLatentCov[d2 * maxDims + d1] = nan("");
682                                 }
683                         }
684                 }
685                 double *iparam = omxMatrixColumn(itemParam, ix);
686                 int *mask = state->paramMap + state->derivPadSize * ix;
687                 rpf_model[id].rescale(spec, iparam, mask, tmpLatentMean, tmpLatentCov);
688         }
689
690         // This is an egregious hack :-)
691         omxState* currentState = oo->currentState;
692         int numFreeParams = currentState->numFreeParams;
693         for (int rx=0; rx < itemParam->rows; rx++) {
694                 for (int cx=0; cx < itemParam->cols; cx++) {
695                         int vx = state->paramMap[cx * state->derivPadSize + rx];
696                         if (vx >= 0 && vx < numFreeParams) {
697                                 // Look for gradient entries to determine what to copy.
698                                 // I said it was a hack, no?
699                                 updateFreeVar(currentState, currentState->freeVarList + vx,
700                                               omxMatrixElement(itemParam, rx, cx));
701                         }
702                 }
703         }
704 }
705
706 // Attempt G-H grid? http://dbarajassolano.wordpress.com/2012/01/26/on-sparse-grid-quadratures/
707 static void
708 ba81SetupQuadrature(omxExpectation* oo, int gridsize, int flat)
709 {
710         omxBA81State *state = (omxBA81State *) oo->argStruct;
711         int numUnique = state->numUnique;
712         int numThreads = getNumThreads(oo);
713         int maxDims = state->maxDims;
714         int Qwidth = state->Qwidth;
715         int numSpecific = state->numSpecific;
716         int priDims = maxDims - (numSpecific? 1 : 0);
717
718         // try starting small and increasing to the cap TODO
719         state->quadGridSize = gridsize;
720
721         state->totalQuadPoints = 1;
722         for (int dx=0; dx < maxDims; dx++) {
723                 state->totalQuadPoints *= state->quadGridSize;
724         }
725
726         state->totalPrimaryPoints = state->totalQuadPoints;
727
728         if (numSpecific) {
729                 state->totalPrimaryPoints /= state->quadGridSize;
730                 state->speLogQarea = Realloc(state->speLogQarea, state->quadGridSize * gridsize, double);
731         }
732
733         state->Qpoint = Realloc(state->Qpoint, state->quadGridSize, double);
734         state->priLogQarea = Realloc(state->priLogQarea, state->totalPrimaryPoints, double);
735
736         for (int px=0; px < state->quadGridSize; px ++) {
737                 state->Qpoint[px] = Qwidth - px * 2 * Qwidth / (state->quadGridSize-1);
738         }
739
740         if (flat) {
741                 // not sure why this is useful, remove? TODO
742                 double flatd = log(1) - log(state->totalPrimaryPoints);
743                 for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
744                         state->priLogQarea[qx] = flatd;
745                 }
746                 flatd = log(1) - log(state->quadGridSize);
747                 for (int sx=0; sx < numSpecific; sx++) {
748                         for (int qx=0; qx < state->quadGridSize; qx++) {
749                                 state->speLogQarea[ sx * state->quadGridSize + qx] = flatd;
750                         }
751                 }
752         } else {
753                 double totalArea = 0;
754                 for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
755                         int quad[priDims];
756                         decodeLocation(qx, priDims, state->quadGridSize, quad);
757                         double where[priDims];
758                         pointToWhere(state, quad, where, priDims);
759                         state->priLogQarea[qx] = dmvnorm(priDims, where,
760                                                          state->latentMeanOut->data,
761                                                          state->latentCovOut->data);
762                         totalArea += exp(state->priLogQarea[qx]);
763                 }
764                 totalArea = log(totalArea);
765                 for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
766                         state->priLogQarea[qx] -= totalArea;
767                         //Rprintf("%.5g,", state->priLogQarea[qx]);
768                 }
769                 //Rprintf("\n");
770
771                 for (int sx=0; sx < numSpecific; sx++) {
772                         totalArea = 0;
773                         for (int qx=0; qx < state->quadGridSize; qx++) {
774                                 int covCell = (priDims + sx) * state->maxAbilities + priDims + sx;
775                                 double den = dnorm(state->Qpoint[qx],
776                                                    state->latentMeanOut->data[priDims + sx],
777                                                    state->latentCovOut->data[covCell], TRUE);
778                                 state->speLogQarea[sx * state->quadGridSize + qx] = den;
779                                 totalArea += exp(den);
780                         }
781                         totalArea = log(totalArea);
782                         for (int qx=0; qx < state->quadGridSize; qx++) {
783                                 state->speLogQarea[sx * state->quadGridSize + qx] -= totalArea;
784                         }
785                 }
786         }
787
788         if (!state->cacheLXK) {
789                 state->lxk = Realloc(state->lxk, numUnique * numThreads, double);
790         } else {
791                 int ns = state->numSpecific;
792                 if (ns == 0) ns = 1;
793                 long numOrdinate = ns * state->totalQuadPoints;
794                 state->lxk = Realloc(state->lxk, numUnique * numOrdinate, double);
795         }
796
797         state->expected = Realloc(state->expected, state->totalOutcomes * state->totalQuadPoints, double);
798 }
799
800 OMXINLINE static void
801 updateLatentParam(omxExpectation* oo)
802 {
803         omxState* currentState = oo->currentState;
804         omxBA81State *state = (omxBA81State*) oo->argStruct;
805         int maxAbilities = state->maxAbilities;
806         int meanNum = state->latentMeanOut->matrixNumber;
807         int covNum = state->latentCovOut->matrixNumber;
808
809         int numFreeParams = currentState->numFreeParams;
810         for (int px=0; px < numFreeParams; px++) {
811                 omxFreeVar *fv = currentState->freeVarList + px;
812                 for (int lx=0; lx < fv->numLocations; lx++) {
813                         int matNum = ~fv->matrices[lx];
814                         if (matNum == meanNum) {
815                                 int dx = fv->row[lx] * fv->col[lx];
816                                 updateFreeVar(currentState, fv, state->ElatentMean[dx]);
817                         } else if (matNum == covNum) {
818                                 int cell = fv->col[lx] * maxAbilities + fv->row[lx];
819                                 updateFreeVar(currentState, fv, state->ElatentCov[cell]);
820                         }
821                 }
822         }
823
824         ba81SetupQuadrature(oo, state->targetQpoints, 0);
825 }
826
827 OMXINLINE static void
828 expectedUpdate(omxData *restrict data, const int *rowMap, const int px, const int item,
829                const double observed, const int outcomes, double *out)
830 {
831         int pick = omxIntDataElementUnsafe(data, rowMap[px], item);
832         if (pick == NA_INTEGER) {
833                 double slice = exp(observed - log(outcomes));
834                 for (int ox=0; ox < outcomes; ox++) {
835                         out[ox] += slice;
836                 }
837         } else {
838                 out[pick-1] += exp(observed);
839         }
840 }
841
842 OMXINLINE static void
843 ba81Expected(omxExpectation* oo)
844 {
845         omxState* currentState = oo->currentState;  // only used in #pragma omp
846         omxBA81State *state = (omxBA81State*) oo->argStruct;
847         omxData *data = state->data;
848         int numSpecific = state->numSpecific;
849         const int *rowMap = state->rowMap;
850         double *patternLik = state->patternLik;
851         double *logNumIdentical = state->logNumIdentical;
852         int numUnique = state->numUnique;
853         int maxDims = state->maxDims;
854         int numItems = state->itemParam->cols;
855         omxMatrix *itemSpec = state->itemSpec;
856         int totalOutcomes = state->totalOutcomes;
857
858         OMXZERO(state->expected, totalOutcomes * state->totalQuadPoints);
859
860         if (numSpecific == 0) {
861 #pragma omp parallel for num_threads(currentState->numThreads)
862                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
863                         int quad[maxDims];
864                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
865                         double *lxk = ba81LikelihoodFast(oo, 0, quad);
866                         for (int px=0; px < numUnique; px++) {
867                                 double *out = state->expected + qx * totalOutcomes;
868                                 double observed = logNumIdentical[px] + lxk[px] - patternLik[px];
869                                 for (int ix=0; ix < numItems; ix++) {
870                                         const double *spec = omxMatrixColumn(itemSpec, ix);
871                                         int outcomes = spec[RPF_ISpecOutcomes];
872                                         expectedUpdate(data, rowMap, px, ix, observed, outcomes, out);
873                                         out += outcomes;
874                                 }
875                         }
876                 }
877         } else {
878                 int sDim = state->maxDims-1;
879                 long specificPoints = state->quadGridSize;
880
881 #pragma omp parallel for num_threads(currentState->numThreads)
882                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
883                         int quad[maxDims];
884                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
885
886                         // allSlxk, Slxk only depend on the ordinate of the primary dimensions
887                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
888                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
889                         cai2010(oo, !state->cacheLXK, quad, allSlxk, Slxk);
890
891                         for (long sx=0; sx < specificPoints; sx++) {
892                                 quad[sDim] = sx;
893                                 long qloc = encodeLocation(state->maxDims, state->quadGridSize, quad);
894
895                                 for (int sgroup=0; sgroup < numSpecific; sgroup++) {
896                                         double *eis = Slxk + numUnique * sgroup;
897                                         double *lxk = ba81LikelihoodFast(oo, sgroup, quad);
898
899                                         for (int px=0; px < numUnique; px++) {
900                                                 double *out = state->expected + totalOutcomes * qloc;
901
902                                                 for (int ix=0; ix < numItems; ix++) {
903                                                         const double *spec = omxMatrixColumn(itemSpec, ix);
904                                                         int outcomes = spec[RPF_ISpecOutcomes];
905                                                         if (state->Sgroup[ix] == sgroup) {
906                                                                 double observed = logNumIdentical[px] + (allSlxk[px] - eis[px]) +
907                                                                         (lxk[px] - patternLik[px]);
908                                                                 expectedUpdate(data, rowMap, px, ix, observed, outcomes, out);
909                                                         }
910                                                         out += outcomes;
911                                                 }
912                                         }
913                                 }
914                         }
915                 }
916         }
917         //pda(state->expected, state->totalOutcomes, state->totalQuadPoints);
918 }
919
920 static void
921 ba81Estep(omxExpectation *oo, enum ComputeExpectationContext ctx) {
922         if (ctx == COMPUTE_EXPECT_INITIALIZE) return;
923
924         omxBA81State *state = (omxBA81State*) oo->argStruct;
925         if (state->userEitemParam) {
926                 state->userEitemParam = FALSE;
927         } else {
928                 omxCopyMatrix(state->EitemParam, state->itemParam);
929         }
930         ba81Estep1(oo);
931         if (ctx == COMPUTE_EXPECT_PREFIT) {
932                 if (state->rescale) schilling_bock_2005_rescale(oo);
933         } else {
934                 updateLatentParam(oo);
935         }
936         ba81Expected(oo);
937 }
938
939 OMXINLINE static double
940 ba81Fit1Ordinate(omxExpectation* oo, const int *quad, const double *weight, int want)
941 {
942         omxBA81State *state = (omxBA81State*) oo->argStruct;
943         omxMatrix *itemSpec = state->itemSpec;
944         omxMatrix *itemParam = state->itemParam;
945         int numItems = itemParam->cols;
946         int maxOutcomes = state->maxOutcomes;
947         int maxDims = state->maxDims;
948         double *myDeriv = state->thrDeriv + itemParam->cols * state->derivPadSize * omx_absolute_thread_num();
949         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN);
950
951         double where[maxDims];
952         pointToWhere(state, quad, where, maxDims);
953
954         double *outcomeProb = computeRPF(oo, itemParam, quad); // avoid malloc/free? TODO
955         if (!outcomeProb) return 0;
956
957         double thr_ll = 0;
958         for (int ix=0; ix < numItems; ix++) {
959                 const double *spec = omxMatrixColumn(itemSpec, ix);
960                 int id = spec[RPF_ISpecID];
961                 int iOutcomes = spec[RPF_ISpecOutcomes];
962
963                 double area = exp(logAreaProduct(state, quad, state->Sgroup[ix]));   // avoid exp() here? TODO
964                 for (int ox=0; ox < iOutcomes; ox++) {
965 #if 0
966 #pragma omp critical(ba81Fit1OrdinateDebug1)
967                         if (!isfinite(outcomeProb[ix * maxOutcomes + ox])) {
968                                 pda(itemParam->data, itemParam->rows, itemParam->cols);
969                                 pda(outcomeProb, outcomes, numItems);
970                                 error("RPF produced NAs");
971                         }
972 #endif
973                         double got = weight[ox] * outcomeProb[ix * maxOutcomes + ox];
974                         thr_ll += got * area;
975                 }
976
977                 if (do_deriv) {
978                         double *iparam = omxMatrixColumn(itemParam, ix);
979                         double *pad = myDeriv + ix * state->derivPadSize;
980                         (*rpf_model[id].dLL1)(spec, iparam, where, area, weight, pad);
981                 }
982                 weight += iOutcomes;
983         }
984
985         Free(outcomeProb);
986
987         return thr_ll;
988 }
989
990 static double
991 ba81ComputeMFit1(omxExpectation* oo, int want, double *gradient, double *hessian)
992 {
993         omxBA81State *state = (omxBA81State*) oo->argStruct;
994         omxState* currentState = oo->currentState;  // only used in #pragma omp
995         omxMatrix *customPrior = state->customPrior;
996         omxMatrix *itemParam = state->itemParam;
997         omxMatrix *itemSpec = state->itemSpec;
998         int maxDims = state->maxDims;
999         const int totalOutcomes = state->totalOutcomes;
1000
1001         double ll = 0;
1002         if (customPrior) {
1003                 omxRecompute(customPrior);
1004                 ll = customPrior->data[0];
1005         }
1006
1007         if (!isfinite(ll)) {
1008                 omxPrint(itemParam, "item param");
1009                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
1010         }
1011
1012 #pragma omp parallel for num_threads(currentState->numThreads)
1013         for (long qx=0; qx < state->totalQuadPoints; qx++) {
1014                 //double area = exp(state->priLogQarea[qx]);  // avoid exp() here? TODO
1015                 int quad[maxDims];
1016                 decodeLocation(qx, maxDims, state->quadGridSize, quad);
1017                 double *weight = state->expected + qx * totalOutcomes;
1018                 double thr_ll = ba81Fit1Ordinate(oo, quad, weight, want);
1019                 
1020 #pragma omp atomic
1021                 ll += thr_ll;
1022         }
1023
1024         if (!customPrior && gradient) {
1025                 double *deriv0 = state->thrDeriv;
1026
1027                 int perThread = itemParam->cols * state->derivPadSize;
1028                 for (int th=1; th < getNumThreads(oo); th++) {
1029                         double *thrD = state->thrDeriv + th * perThread;
1030                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
1031                 }
1032
1033                 int numItems = itemParam->cols;
1034                 for (int ix=0; ix < numItems; ix++) {
1035                         const double *spec = omxMatrixColumn(itemSpec, ix);
1036                         int id = spec[RPF_ISpecID];
1037                         double *iparam = omxMatrixColumn(itemParam, ix);
1038                         double *pad = deriv0 + ix * state->derivPadSize;
1039                         (*rpf_model[id].dLL2)(spec, iparam, pad);
1040                 }
1041
1042                 int numFreeParams = currentState->numFreeParams;
1043                 int numParams = itemParam->cols * state->derivPadSize;
1044                 for (int ox=0; ox < numParams; ox++) {
1045                         int to = state->paramMap[ox];
1046                         if (to == -1) continue;
1047
1048                         // Need to check because this can happen if
1049                         // lbounds/ubounds are not set appropriately.
1050                         if (0 && !isfinite(deriv0[ox])) {
1051                                 int item = ox / itemParam->rows;
1052                                 Rprintf("item parameters:\n");
1053                                 const double *spec = omxMatrixColumn(itemSpec, item);
1054                                 int id = spec[RPF_ISpecID];
1055                                 int numParam = (*rpf_model[id].numParam)(spec);
1056                                 double *iparam = omxMatrixColumn(itemParam, item);
1057                                 pda(iparam, numParam, 1);
1058                                 // Perhaps bounds can be pulled in from librpf? TODO
1059                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
1060                                       ox, item, deriv0[ox]);
1061                         }
1062
1063                         if (to < numFreeParams) {
1064                                 gradient[to] -= deriv0[ox];
1065                         } else {
1066                                 if (hessian) hessian[to - numFreeParams] -= deriv0[ox];
1067                         }
1068                 }
1069         }
1070
1071         return -ll;
1072 }
1073
1074 double
1075 ba81ComputeFit(omxExpectation* oo, int want, double *gradient, double *hessian)
1076 {
1077         if (!want) return 0;
1078
1079         omxBA81State *state = (omxBA81State*) oo->argStruct;
1080         omxState* currentState = oo->currentState;
1081
1082         if (want & FF_COMPUTE_FIT) {
1083                 ++state->fitCount;
1084         }
1085
1086         if (want & (FF_COMPUTE_GRADIENT|FF_COMPUTE_HESSIAN)) {
1087                 // M-step
1088                 ++state->gradientCount;
1089
1090                 int numFreeParams = currentState->numFreeParams;
1091                 OMXZERO(gradient, numFreeParams);
1092                 if (hessian) OMXZERO(hessian, numFreeParams * numFreeParams);
1093
1094                 omxMatrix *itemParam = state->itemParam;
1095                 OMXZERO(state->thrDeriv, state->derivPadSize * itemParam->cols * getNumThreads(oo));
1096
1097                 double got = ba81ComputeMFit1(oo, want, gradient, hessian);
1098                 state->lastEMLL = got;
1099                 return got;
1100         } else {
1101                 // Major EM iteration, note completely different LL calculation
1102
1103                 double *patternLik = state->patternLik;
1104                 int *numIdentical = state->numIdentical;
1105                 int numUnique = state->numUnique;
1106                 double got = 0;
1107                 for (int ux=0; ux < numUnique; ux++) {
1108                         got += numIdentical[ux] * patternLik[ux];
1109                 }
1110                 return -2 * got;
1111         }
1112 }
1113
1114 static double *
1115 realEAP(omxExpectation *oo)
1116 {
1117         // add openmp parallelization stuff TODO
1118
1119         omxBA81State *state = (omxBA81State *) oo->argStruct;
1120         int numSpecific = state->numSpecific;
1121         int maxDims = state->maxDims;
1122         int priDims = maxDims - (numSpecific? 1 : 0);
1123         int numUnique = state->numUnique;
1124         int maxAbilities = state->maxAbilities;
1125
1126         // TODO Wainer & Thissen. (1987). Estimating ability with the wrong
1127         // model. Journal of Educational Statistics, 12, 339-368.
1128
1129         int numQpoints = state->targetQpoints * 2;  // make configurable TODO
1130
1131         if (numQpoints < 1 + 2.0 * sqrt(state->itemSpec->cols)) {
1132                 // Thissen & Orlando (2001, p. 136)
1133                 warning("EAP requires at least 2*sqrt(items) quadrature points");
1134         }
1135
1136         ba81SetupQuadrature(oo, numQpoints, 0);
1137         ba81Estep1(oo);
1138
1139         /*
1140         double *cov = NULL;
1141         if (maxDims > 1) {
1142                 strcpy(out[2].label, "ability.cov");
1143                 out[2].numValues = -1;
1144                 out[2].rows = maxDims;
1145                 out[2].cols = maxDims;
1146                 out[2].values = (double*) R_alloc(out[2].rows * out[2].cols, sizeof(double));
1147                 cov = out[2].values;
1148                 OMXZERO(cov, out[2].rows * out[2].cols);
1149         }
1150         */
1151
1152         // Need a separate work space because the destination needs
1153         // to be in unsorted order with duplicated rows.
1154         double *ability = Calloc(numUnique * maxAbilities * 2, double);
1155
1156         for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
1157                 int quad[priDims];
1158                 decodeLocation(qx, priDims, state->quadGridSize, quad);
1159                 double where[priDims];
1160                 pointToWhere(state, quad, where, priDims);
1161                 double logArea = state->priLogQarea[qx];
1162
1163                 double *lxk;
1164                 if (numSpecific == 0) {
1165                         lxk = ba81LikelihoodFast(oo, 0, quad);
1166                 } else {
1167                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
1168                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
1169                         cai2010(oo, FALSE, quad, allSlxk, Slxk);
1170                         lxk = allSlxk;
1171                 }
1172
1173                 double *row = ability;
1174                 for (int px=0; px < numUnique; px++) {
1175                         double plik = exp(logArea + lxk[px]);
1176                         for (int dx=0; dx < priDims; dx++) {
1177                                 double piece = where[dx] * plik;
1178                                 row[dx*2] += piece;
1179                                 row[dx*2 + 1] += where[dx] * piece;
1180                                 // ignore cov, for now
1181                         }
1182                         row += 2 * maxAbilities;
1183                 }
1184         }
1185
1186         double *ris = Realloc(NULL, numUnique, double);
1187         for (int sx=0; sx < numSpecific; sx++) {
1188                 for (int sqx=0; sqx < state->quadGridSize; sqx++) {
1189                         double area = exp(state->speLogQarea[sx * state->quadGridSize + sqx]);
1190                         double ptArea = area * state->Qpoint[sqx];
1191                         OMXZERO(ris, numUnique);
1192                         for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
1193                                 int quad[maxDims];
1194                                 decodeLocation(qx, priDims, state->quadGridSize, quad);
1195                                 quad[priDims] = sqx;
1196
1197                                 double *allSlxk = CALC_ALLSLXK(state, numUnique);
1198                                 double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
1199                                 cai2010(oo, FALSE, quad, allSlxk, Slxk);
1200
1201                                 double *eis = Slxk + numUnique * sx;
1202                                 double *lxk = ba81LikelihoodFast(oo, sx, quad);
1203
1204                                 double logArea = state->priLogQarea[qx];
1205                                 for (int px=0; px < numUnique; px++) {
1206                                         ris[px] += exp(logArea + lxk[px] + allSlxk[px] - eis[px]);
1207                                 }
1208                         }
1209                         double *row = ability;
1210                         for (int px=0; px < numUnique; px++) {
1211                                 double piece = ris[px] * ptArea;
1212                                 row[(priDims + sx) * 2] += piece;
1213                                 row[(priDims + sx) * 2 + 1] += piece * state->Qpoint[sqx];
1214                                 row += 2 * maxAbilities;
1215                         }
1216                 }
1217         }
1218         Free(ris);
1219
1220         double *patternLik = state->patternLik;
1221         double *row = ability;
1222         for (int px=0; px < numUnique; px++) {
1223                 double denom = exp(patternLik[px]);
1224                 for (int ax=0; ax < maxAbilities; ax++) {
1225                         row[ax * 2] /= denom;
1226                         row[ax * 2 + 1] /= denom;
1227                         row[ax * 2 + 1] -= row[ax * 2] * row[ax * 2];
1228                 }
1229                 row += 2 * maxAbilities;
1230         }
1231
1232         /*
1233         // make symmetric
1234         for (int d1=0; d1 < maxDims; d1++) {
1235                 for (int d2=0; d2 < d1; d2++) {
1236                         cov[d2 * maxDims + d1] = cov[d1 * maxDims + d2];
1237                 }
1238         }
1239         */
1240
1241         for (int px=0; px < numUnique; px++) {
1242                 double *arow = ability + px * 2 * maxAbilities;
1243                 for (int dx=0; dx < maxAbilities; dx++) {
1244                         arow[dx*2+1] = sqrt(arow[dx*2+1]);
1245                 }
1246         }
1247
1248         return ability;
1249 }
1250
1251 /**
1252  * MAP is not affected by the number of items. EAP is. Likelihood can
1253  * get concentrated in a single quadrature ordinate. For 3PL, response
1254  * patterns can have a bimodal likelihood. This will confuse MAP and
1255  * is a key advantage of EAP (Thissen & Orlando, 2001, p. 136).
1256  *
1257  * Thissen, D. & Orlando, M. (2001). IRT for items scored in two
1258  * categories. In D. Thissen & H. Wainer (Eds.), \emph{Test scoring}
1259  * (pp 73-140). Lawrence Erlbaum Associates, Inc.
1260  */
1261 omxRListElement *
1262 ba81EAP(omxExpectation *oo, int *numReturns)   // rename to "return stuff to user"
1263 {
1264         omxBA81State *state = (omxBA81State *) oo->argStruct;
1265         int maxAbilities = state->maxAbilities;
1266
1267         *numReturns = 1 + (state->scores != SCORES_OMIT);
1268         omxRListElement *out = (omxRListElement*) R_alloc(*numReturns, sizeof(omxRListElement));
1269         int ox=0;
1270
1271         out[ox].numValues = 1;
1272         strcpy(out[ox].label, "EM.LL");
1273         out[ox].values = &state->lastEMLL;
1274         ++ox;
1275
1276         if (state->scores != SCORES_OMIT) {
1277                 double *ability = realEAP(oo);
1278                 int numUnique = state->numUnique;
1279                 omxData *data = state->data;
1280
1281                 int cols = state->scores == SCORES_FULL? data->rows : numUnique;
1282
1283                 strcpy(out[ox].label, "ability");
1284                 out[ox].numValues = -1;
1285                 out[ox].rows = 2 * maxAbilities;
1286                 out[ox].cols = cols;
1287                 out[ox].values = (double*) R_alloc(out[ox].rows * out[ox].cols, sizeof(double));
1288
1289                 if (state->scores == SCORES_FULL) {
1290                         for (int rx=0; rx < numUnique; rx++) {
1291                                 double *pa = ability + rx * 2 * maxAbilities;
1292
1293                                 int dups = omxDataNumIdenticalRows(state->data, state->rowMap[rx]);
1294                                 for (int dup=0; dup < dups; dup++) {
1295                                         int dest = omxDataIndex(data, state->rowMap[rx]+dup);
1296                                         memcpy(out[ox].values + dest * out[ox].rows, pa, sizeof(double) * 2 * maxAbilities);
1297                                 }
1298                         }
1299                 } else {
1300                         memcpy(out[ox].values, ability, sizeof(double) * numUnique * 2 * maxAbilities);
1301                 }
1302                 Free(ability);
1303                 ++ox;
1304         }
1305
1306         for (int ix=0; ix < state->itemParam->cols; ix++) {
1307                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1308                 int id = spec[RPF_ISpecID];
1309                 double *param = omxMatrixColumn(state->itemParam, ix);
1310                 rpf_model[id].postfit(spec, param);
1311         }
1312
1313         return out;
1314 }
1315
1316 static void ba81Destroy(omxExpectation *oo) {
1317         if(OMX_DEBUG) {
1318                 Rprintf("Freeing %s function.\n", NAME);
1319         }
1320         omxBA81State *state = (omxBA81State *) oo->argStruct;
1321         //Rprintf("fit %d gradient %d\n", state->fitCount, state->gradientCount);
1322         omxFreeAllMatrixData(state->itemSpec);
1323         omxFreeAllMatrixData(state->itemParam);
1324         omxFreeAllMatrixData(state->EitemParam);
1325         omxFreeAllMatrixData(state->design);
1326         omxFreeAllMatrixData(state->customPrior);
1327         omxFreeAllMatrixData(state->latentMeanOut);
1328         omxFreeAllMatrixData(state->latentCovOut);
1329         Free(state->logNumIdentical);
1330         Free(state->numIdentical);
1331         Free(state->Qpoint);
1332         Free(state->priLogQarea);
1333         Free(state->rowMap);
1334         Free(state->patternLik);
1335         Free(state->lxk);
1336         Free(state->Slxk);
1337         Free(state->allSlxk);
1338         Free(state->Sgroup);
1339         Free(state->expected);
1340         Free(state->paramMap);
1341         Free(state->thrDeriv);
1342         Free(state->ElatentMean);
1343         Free(state->ElatentCov);
1344         Free(state->tmpLatentMean);
1345         Free(state->tmpLatentCov);
1346         Free(state);
1347 }
1348
1349 void getMatrixDims(SEXP r_theta, int *rows, int *cols)
1350 {
1351     SEXP matrixDims;
1352     PROTECT(matrixDims = getAttrib(r_theta, R_DimSymbol));
1353     int *dimList = INTEGER(matrixDims);
1354     *rows = dimList[0];
1355     *cols = dimList[1];
1356     UNPROTECT(1);
1357 }
1358
1359 void omxInitExpectationBA81(omxExpectation* oo) {
1360         omxState* currentState = oo->currentState;      
1361         SEXP rObj = oo->rObj;
1362         SEXP tmp;
1363         
1364         if(OMX_DEBUG) {
1365                 Rprintf("Initializing %s.\n", NAME);
1366         }
1367         if (!rpf_model) {
1368                 const int wantVersion = 3;
1369                 int version;
1370                 get_librpf_t get_librpf = (get_librpf_t) R_GetCCallable("rpf", "get_librpf_model_GPL");
1371                 (*get_librpf)(&version, &rpf_numModels, &rpf_model);
1372                 if (version < wantVersion) error("librpf binary API %d installed, at least %d is required",
1373                                                  version, wantVersion);
1374         }
1375         
1376         omxBA81State *state = Calloc(1, omxBA81State);
1377         oo->argStruct = (void*) state;
1378
1379         PROTECT(tmp = GET_SLOT(rObj, install("data")));
1380         state->data = omxDataLookupFromState(tmp, currentState);
1381
1382         if (strcmp(omxDataType(state->data), "raw") != 0) {
1383                 omxRaiseErrorf(currentState, "%s unable to handle data type %s", NAME, omxDataType(state->data));
1384                 return;
1385         }
1386
1387         state->itemSpec =
1388                 omxNewMatrixFromSlot(rObj, currentState, "ItemSpec");
1389         state->design =
1390                 omxNewMatrixFromSlot(rObj, currentState, "Design");
1391         state->itemParam =
1392                 omxNewMatrixFromSlot(rObj, currentState, "ItemParam");
1393         state->latentMeanOut = omxNewMatrixFromSlot(rObj, currentState, "mean");
1394         if (!state->latentMeanOut) error("Failed to retrieve mean matrix");
1395         state->latentCovOut  = omxNewMatrixFromSlot(rObj, currentState, "cov");
1396         if (!state->latentCovOut) error("Failed to retrieve cov matrix");
1397
1398         state->EitemParam =
1399                 omxNewMatrixFromSlot(rObj, currentState, "EItemParam");
1400         if (state->EitemParam) {
1401                 state->userEitemParam = TRUE;
1402         } else {
1403                 state->EitemParam =
1404                         omxInitTemporaryMatrix(NULL, state->itemParam->rows, state->itemParam->cols,
1405                                                TRUE, currentState);
1406         }
1407
1408         state->customPrior =
1409                 omxNewMatrixFromSlot(rObj, currentState, "CustomPrior");
1410         
1411         oo->computeFun = ba81Estep;
1412         oo->destructFun = ba81Destroy;
1413         
1414         // TODO: Exactly identical rows do not contribute any information.
1415         // The sorting algorithm ought to remove them so we don't waste RAM.
1416         // The following summary stats would be cheaper to calculate too.
1417
1418         int numUnique = 0;
1419         omxData *data = state->data;
1420         if (omxDataNumFactor(data) != data->cols) {
1421                 // verify they are ordered factors TODO
1422                 omxRaiseErrorf(currentState, "%s: all columns must be factors", NAME);
1423                 return;
1424         }
1425
1426         for (int rx=0; rx < data->rows;) {
1427                 rx += omxDataNumIdenticalRows(state->data, rx);
1428                 ++numUnique;
1429         }
1430         state->numUnique = numUnique;
1431
1432         state->rowMap = Realloc(NULL, numUnique, int);
1433         state->numIdentical = Realloc(NULL, numUnique, int);
1434         state->logNumIdentical = Realloc(NULL, numUnique, double);
1435
1436         int numItems = state->itemParam->cols;
1437         if (data->cols != numItems) {
1438                 error("Data has %d columns for %d items", data->cols, numItems);
1439         }
1440
1441         for (int rx=0, ux=0; rx < data->rows; ux++) {
1442                 if (rx == 0) {
1443                         // all NA rows will sort to the top
1444                         int na=0;
1445                         for (int ix=0; ix < numItems; ix++) {
1446                                 if (omxIntDataElement(data, 0, ix) == NA_INTEGER) { ++na; }
1447                         }
1448                         if (na == numItems) {
1449                                 omxRaiseErrorf(currentState, "Remove rows with all NAs");
1450                                 return;
1451                         }
1452                 }
1453                 int dups = omxDataNumIdenticalRows(state->data, rx);
1454                 state->numIdentical[ux] = dups;
1455                 state->logNumIdentical[ux] = log(dups);
1456                 state->rowMap[ux] = rx;
1457                 rx += dups;
1458         }
1459
1460         state->patternLik = Realloc(NULL, numUnique, double);
1461
1462         int numThreads = getNumThreads(oo);
1463
1464         int maxSpec = 0;
1465         int maxParam = 0;
1466         state->maxDims = 0;
1467         state->maxOutcomes = 0;
1468
1469         for (int ix=0; ix < numItems; ix++) {
1470                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1471                 int id = spec[RPF_ISpecID];
1472                 if (id < 0 || id >= rpf_numModels) {
1473                         omxRaiseErrorf(currentState, "ItemSpec column %d has unknown item model %d", ix, id);
1474                         return;
1475                 }
1476
1477                 double *param = omxMatrixColumn(state->itemParam, ix);
1478                 rpf_model[id].prefit(spec, param);
1479         }
1480
1481         int totalOutcomes = 0;
1482         for (int cx = 0; cx < data->cols; cx++) {
1483                 const double *spec = omxMatrixColumn(state->itemSpec, cx);
1484                 int id = spec[RPF_ISpecID];
1485                 int dims = spec[RPF_ISpecDims];
1486                 if (state->maxDims < dims)
1487                         state->maxDims = dims;
1488
1489                 int no = spec[RPF_ISpecOutcomes];
1490                 totalOutcomes += no;
1491                 if (state->maxOutcomes < no)
1492                         state->maxOutcomes = no;
1493
1494                 // TODO this summary stat should be available from omxData
1495                 int dataMax=0;
1496                 for (int rx=0; rx < data->rows; rx++) {
1497                         int pick = omxIntDataElementUnsafe(data, rx, cx);
1498                         if (dataMax < pick)
1499                                 dataMax = pick;
1500                 }
1501                 if (dataMax > no) {
1502                         error("Data for item %d has %d outcomes, not %d", cx+1, dataMax, no);
1503                 } else if (dataMax < no) {
1504                         warning("Data for item %d has only %d outcomes, not %d", cx+1, dataMax, no);
1505                         // promote to error?
1506                         // should complain if an outcome is not represented in the data TODO
1507                 }
1508
1509                 int numSpec = (*rpf_model[id].numSpec)(spec);
1510                 if (maxSpec < numSpec)
1511                         maxSpec = numSpec;
1512
1513                 int numParam = (*rpf_model[id].numParam)(spec);
1514                 if (maxParam < numParam)
1515                         maxParam = numParam;
1516         }
1517
1518         state->totalOutcomes = totalOutcomes;
1519
1520         if (state->itemSpec->cols != data->cols || state->itemSpec->rows != maxSpec) {
1521                 omxRaiseErrorf(currentState, "ItemSpec must have %d item columns and %d rows",
1522                                data->cols, maxSpec);
1523                 return;
1524         }
1525         if (state->itemParam->rows != maxParam) {
1526                 omxRaiseErrorf(currentState, "ItemParam should have %d rows", maxParam);
1527                 return;
1528         }
1529
1530         state->derivPadSize = maxParam + maxParam*(1+maxParam)/2;
1531
1532         if (state->design == NULL) {
1533                 state->maxAbilities = state->maxDims;
1534                 state->design = omxInitTemporaryMatrix(NULL, state->maxDims, numItems,
1535                                        TRUE, currentState);
1536                 for (int ix=0; ix < numItems; ix++) {
1537                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1538                         int dims = spec[RPF_ISpecDims];
1539                         for (int dx=0; dx < state->maxDims; dx++) {
1540                                 omxSetMatrixElement(state->design, dx, ix, dx < dims? (double)dx+1 : nan(""));
1541                         }
1542                 }
1543         } else {
1544                 omxMatrix *design = state->design;
1545                 if (design->cols != numItems ||
1546                     design->rows != state->maxDims) {
1547                         omxRaiseErrorf(currentState, "Design matrix should have %d rows and %d columns",
1548                                        state->maxDims, numItems);
1549                         return;
1550                 }
1551
1552                 state->maxAbilities = 0;
1553                 for (int ix=0; ix < design->rows * design->cols; ix++) {
1554                         double got = design->data[ix];
1555                         if (!R_FINITE(got)) continue;
1556                         if (round(got) != (int)got) error("Design matrix can only contain integers"); // TODO better way?
1557                         if (state->maxAbilities < got)
1558                                 state->maxAbilities = got;
1559                 }
1560                 for (int ix=0; ix < design->cols; ix++) {
1561                         const double *idesign = omxMatrixColumn(design, ix);
1562                         int ddim = 0;
1563                         for (int rx=0; rx < design->rows; rx++) {
1564                                 if (isfinite(idesign[rx])) ddim += 1;
1565                         }
1566                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1567                         int dims = spec[RPF_ISpecDims];
1568                         if (ddim > dims) error("Item %d has %d dims but design assigns %d", ix, dims, ddim);
1569                 }
1570         }
1571         if (state->maxAbilities <= state->maxDims) {
1572                 state->Sgroup = Calloc(numItems, int);
1573         } else {
1574                 // Not sure if this is correct, revisit TODO
1575                 int Sgroup0 = -1;
1576                 state->Sgroup = Realloc(NULL, numItems, int);
1577                 for (int dx=0; dx < state->maxDims; dx++) {
1578                         for (int ix=0; ix < numItems; ix++) {
1579                                 int ability = omxMatrixElement(state->design, dx, ix);
1580                                 if (dx < state->maxDims - 1) {
1581                                         if (Sgroup0 <= ability)
1582                                                 Sgroup0 = ability+1;
1583                                         continue;
1584                                 }
1585                                 int ss=-1;
1586                                 if (ability >= Sgroup0) {
1587                                         if (ss == -1) {
1588                                                 ss = ability;
1589                                         } else {
1590                                                 omxRaiseErrorf(currentState, "Item %d cannot belong to more than "
1591                                                                "1 specific dimension (both %d and %d)",
1592                                                                ix, ss, ability);
1593                                                 return;
1594                                         }
1595                                 }
1596                                 if (ss == -1) ss = Sgroup0;
1597                                 state->Sgroup[ix] = ss - Sgroup0;
1598                         }
1599                 }
1600                 state->numSpecific = state->maxAbilities - state->maxDims + 1;
1601                 state->allSlxk = Realloc(NULL, numUnique * numThreads, double);
1602                 state->Slxk = Realloc(NULL, numUnique * state->numSpecific * numThreads, double);
1603         }
1604
1605         if (state->latentMeanOut->rows * state->latentMeanOut->cols != state->maxAbilities) {
1606                 error("The mean matrix '%s' must be 1x%d or %dx1", state->latentMeanOut->name,
1607                       state->maxAbilities, state->maxAbilities);
1608         }
1609         if (state->latentCovOut->rows != state->maxAbilities ||
1610             state->latentCovOut->cols != state->maxAbilities) {
1611                 error("The cov matrix '%s' must be %dx%d",
1612                       state->latentCovOut->name, state->maxAbilities, state->maxAbilities);
1613         }
1614
1615         PROTECT(tmp = GET_SLOT(rObj, install("cache")));
1616         state->cacheLXK = asLogical(tmp);
1617
1618         PROTECT(tmp = GET_SLOT(rObj, install("rescale")));
1619         state->rescale = asLogical(tmp);
1620
1621         PROTECT(tmp = GET_SLOT(rObj, install("qpoints")));
1622         state->targetQpoints = asReal(tmp);
1623
1624         PROTECT(tmp = GET_SLOT(rObj, install("qwidth")));
1625         state->Qwidth = asReal(tmp);
1626
1627         PROTECT(tmp = GET_SLOT(rObj, install("scores")));
1628         const char *score_option = CHAR(asChar(tmp));
1629         if (strcmp(score_option, "omit")==0) state->scores = SCORES_OMIT;
1630         if (strcmp(score_option, "unique")==0) state->scores = SCORES_UNIQUE;
1631         if (strcmp(score_option, "full")==0) state->scores = SCORES_FULL;
1632
1633         state->ElatentMean = Realloc(NULL, state->maxAbilities * numUnique, double);
1634         state->ElatentCov = Realloc(NULL, state->maxAbilities * state->maxAbilities * numUnique, double);
1635         state->tmpLatentMean = Realloc(NULL, state->maxDims, double);
1636         state->tmpLatentCov = Realloc(NULL, state->maxDims * state->maxDims, double);
1637
1638         ba81SetupQuadrature(oo, state->targetQpoints, 0);
1639
1640         buildParamMap(oo);
1641
1642         // verify data bounded between 1 and numOutcomes TODO
1643         // hm, looks like something could be added to omxData for column summary stats?
1644 }