Fix compile errors after merge
[openmx:openmx.git] / src / omxExpectationBA81.cpp
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 // Consider replacing log() with log2() in some places? Not worth it?
19
20 #include <Rmath.h>
21 #include "omxExpectation.h"
22 #include "omxOpenmpWrap.h"
23 #include "npsolWrap.h"
24 #include "libirt-rpf.h"
25 #include "dmvnorm.h"
26
27 static const char *NAME = "ExpectationBA81";
28
29 static const struct rpf *rpf_model = NULL;
30 static int rpf_numModels;
31 static const double MIN_PATTERNLIK = 1e-100;
32
33 enum score_option {
34         SCORES_OMIT,
35         SCORES_UNIQUE,
36         SCORES_FULL
37 };
38
39 typedef struct {
40
41         // data characteristics
42         omxData *data;
43         int numUnique;
44         int *numIdentical;        // length numUnique
45         double *logNumIdentical;  // length numUnique
46         int *rowMap;              // length numUnique
47
48         // item description related
49         omxMatrix *itemSpec;
50         int maxOutcomes;
51         int maxDims;
52         int maxAbilities;
53         int numSpecific;
54         int *Sgroup;              // item's specific group 0..numSpecific-1
55         omxMatrix *design;        // items * maxDims
56
57         // quadrature related
58         double Qwidth;
59         double targetQpoints;
60         long quadGridSize;
61         long totalQuadPoints;     // quadGridSize ^ maxDims
62         long totalPrimaryPoints;  // totalQuadPoints except for specific dim TODO
63         double *Qpoint;           // quadGridSize
64         double *priLogQarea;      // totalPrimaryPoints
65         double *speLogQarea;      // quadGridSize * numSpecific
66
67         // estimation related
68         omxMatrix *EitemParam;    // E step version
69         int userEitemParam;
70         int rescale;
71         omxMatrix *itemParam;     // M step version
72         omxMatrix *customPrior;   // TODO remove?
73         int derivPadSize;         // maxParam + maxParam*(1+maxParam)/2
74         double *thrDeriv;         // itemParam->cols * derivPadSize * thread
75         int *paramMap;            // itemParam->cols * derivPadSize -> index of free parameter
76         int cacheLXK;
77         double *lxk;              // wo/cache, numUnique * thread
78         double *allSlxk;          // numUnique * thread
79         double *Slxk;             // numUnique * #specific dimensions * thread
80         double *patternLik;       // numUnique
81         int totalOutcomes;
82         double *expected;         // totalOutcomes * totalQuadPoints
83         double *ElatentMean;      // maxAbilities * numUnique
84         double *ElatentCov;       // maxAbilities * maxAbilities * numUnique ; only lower triangle is used
85         double *tmpLatentMean;    // maxDims
86         double *tmpLatentCov;     // maxDims * maxDims ; only lower triangle is used
87         omxMatrix *latentMeanOut;
88         omxMatrix *latentCovOut;
89         int choleskyError;
90
91         enum score_option scores;
92         int gradientCount;
93         int fitCount;
94         double lastEMLL;
95 } omxBA81State;
96
97 static void
98 pda(const double *ar, int rows, int cols) {   // column major order
99         std::string buf;
100         for (int rx=0; rx < rows; rx++) {
101                 for (int cx=0; cx < cols; cx++) {
102                         buf += string_snprintf("%.6g, ", ar[cx * rows + rx]);
103                 }
104                 buf += "\n";
105         }
106         mxLogBig(buf);
107 }
108
109 static void
110 pia(const int *ar, int rows, int cols) {   // column major order
111         std::string buf;
112         for (int rx=0; rx < rows; rx++) {
113                 for (int cx=0; cx < cols; cx++) {
114                         buf += string_snprintf("%d, ", ar[cx * rows + rx]);
115                 }
116                 buf += "\n";
117         }
118         mxLogBig(buf);
119 }
120
121 static int
122 getNumThreads(omxExpectation* oo)
123 {
124         int numThreads = Global->numThreads;
125         if (numThreads < 1) numThreads = 1;
126         return numThreads;
127 }
128
129 static void buildParamMap(omxExpectation* oo)
130 {
131         omxBA81State *state = (omxBA81State *) oo->argStruct;
132         omxMatrix *itemParam = state->itemParam;
133         int size = itemParam->cols * state->derivPadSize;
134
135         state->paramMap = Realloc(NULL, size, int);  // matrix location to free param index
136         for (int px=0; px < size; px++) {
137                 state->paramMap[px] = -1;
138         }
139
140         size_t numFreeParams = oo->freeVarGroup->vars.size();
141         int *pRow = Realloc(NULL, numFreeParams, int);
142         int *pCol = Realloc(NULL, numFreeParams, int);
143
144         for (size_t px=0; px < numFreeParams; px++) {
145                 pRow[px] = -1;
146                 pCol[px] = -1;
147                 omxFreeVar *fv = oo->freeVarGroup->vars[px];
148                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
149                         omxFreeVarLocation *loc = &fv->locations[lx];
150                         if (~loc->matrix == itemParam->matrixNumber) {
151                                 pRow[px] = loc->row;
152                                 pCol[px] = loc->col;
153                                 int at = pCol[px] * state->derivPadSize + pRow[px];
154                                 state->paramMap[at] = px;
155                         }
156                 }
157         }
158
159         for (size_t p1=0; p1 < numFreeParams; p1++) {
160                 for (size_t p2=p1; p2 < numFreeParams; p2++) {
161                         if (pCol[p1] == -1 || pCol[p1] != pCol[p2]) continue;
162                         const double *spec = omxMatrixColumn(state->itemSpec, pCol[p1]);
163                         int id = spec[RPF_ISpecID];
164                         int numParam = (*rpf_model[id].numParam)(spec);
165                         int r1 = pRow[p1];
166                         int r2 = pRow[p2];
167                         if (r1 > r2) { int tmp=r1; r1=r2; r2=tmp; }
168                         int rowOffset = 0;
169                         for (int rx=1; rx <= r2; rx++) rowOffset += rx;
170                         int at = pCol[p1] * state->derivPadSize + numParam + rowOffset + r1;
171                         state->paramMap[at] = numFreeParams + p1 * numFreeParams + p2;
172                 }
173         }
174
175         Free(pRow);
176         Free(pCol);
177
178         state->thrDeriv = Realloc(NULL, itemParam->cols * state->derivPadSize * getNumThreads(oo), double);
179 }
180
181 OMXINLINE static void
182 pointToWhere(omxBA81State *state, const int *quad, double *where, int upto)
183 {
184         for (int dx=0; dx < upto; dx++) {
185                 where[dx] = state->Qpoint[quad[dx]];
186         }
187 }
188
189 OMXINLINE static void
190 assignDims(omxMatrix *itemSpec, omxMatrix *design, int dims, int maxDims, int ix,
191            const double *theta, double *ptheta)
192 {
193         for (int dx=0; dx < dims; dx++) {
194                 int ability = (int)omxMatrixElement(design, dx, ix) - 1;
195                 if (ability >= maxDims) ability = maxDims-1;
196                 ptheta[dx] = theta[ability];
197         }
198 }
199
200 /**
201  * \param theta Vector of ability parameters, one per ability
202  * \returns A numItems by maxOutcomes colMajor vector of doubles. Caller must Free it.
203  */
204 static double *
205 computeRPF(omxExpectation *oo, omxMatrix *itemParam, const int *quad)
206 {
207         omxBA81State *state = (omxBA81State*) oo->argStruct;
208         omxMatrix *itemSpec = state->itemSpec;
209         int numItems = itemSpec->cols;
210         omxMatrix *design = state->design;
211         int maxDims = state->maxDims;
212
213         double theta[maxDims];
214         pointToWhere(state, quad, theta, maxDims);
215
216         double *outcomeProb = Realloc(NULL, numItems * state->maxOutcomes, double);
217         //double *outcomeProb = Calloc(numItems * state->maxOutcomes, double);
218
219         for (int ix=0; ix < numItems; ix++) {
220                 const double *spec = omxMatrixColumn(itemSpec, ix);
221                 double *iparam = omxMatrixColumn(itemParam, ix);
222                 double *out = outcomeProb + ix * state->maxOutcomes;
223                 int id = spec[RPF_ISpecID];
224                 int dims = spec[RPF_ISpecDims];
225                 double ptheta[dims];
226                 assignDims(itemSpec, design, dims, maxDims, ix, theta, ptheta);
227                 (*rpf_model[id].logprob)(spec, iparam, ptheta, out);
228 #if 0
229                 for (int ox=0; ox < spec[RPF_ISpecOutcomes]; ox++) {
230                         if (!isfinite(out[ox]) || out[ox] > 0) {
231                                 mxLog("spec\n");
232                                 pda(spec, itemSpec->rows, 1);
233                                 mxLog("item param\n");
234                                 pda(iparam, itemParam->rows, 1);
235                                 mxLog("where\n");
236                                 pda(ptheta, dims, 1);
237                                 error("RPF returned %20.20f", out[ox]);
238                         }
239                 }
240 #endif
241         }
242
243         return outcomeProb;
244 }
245
246 OMXINLINE static long
247 encodeLocation(const int dims, const long grid, const int *quad)
248 {
249         long qx = 0;
250         for (int dx=dims-1; dx >= 0; dx--) {
251                 qx = qx * grid;
252                 qx += quad[dx];
253         }
254         return qx;
255 }
256
257 OMXINLINE static double *
258 getLXKcache(omxBA81State *state, const int *quad, const int specific)
259 {
260         long ordinate;
261         if (state->numSpecific == 0) {
262                 ordinate = encodeLocation(state->maxDims, state->quadGridSize, quad);
263         } else {
264                 ordinate = (specific * state->totalQuadPoints +
265                             encodeLocation(state->maxDims, state->quadGridSize, quad));
266         }
267         return state->lxk + state->numUnique * ordinate;
268 }
269
270 OMXINLINE static double *
271 ba81Likelihood(omxExpectation *oo, int specific, const int *quad)
272 {
273         omxBA81State *state = (omxBA81State*) oo->argStruct;
274         int numUnique = state->numUnique;
275         int maxOutcomes = state->maxOutcomes;
276         omxData *data = state->data;
277         int numItems = state->itemSpec->cols;
278         int *Sgroup = state->Sgroup;
279         double *lxk;
280
281         if (!state->cacheLXK) {
282                 lxk = state->lxk + numUnique * omx_absolute_thread_num();
283         } else {
284                 lxk = getLXKcache(state, quad, specific);
285         }
286
287         const double *outcomeProb = computeRPF(oo, state->EitemParam, quad);
288         if (!outcomeProb) {
289                 OMXZERO(lxk, numUnique);
290                 return lxk;
291         }
292
293         const int *rowMap = state->rowMap;
294         for (int px=0; px < numUnique; px++) {
295                 double lxk1 = 0;
296                 for (int ix=0; ix < numItems; ix++) {
297                         if (specific != Sgroup[ix]) continue;
298                         int pick = omxIntDataElementUnsafe(data, rowMap[px], ix);
299                         if (pick == NA_INTEGER) continue;
300                         double piece = outcomeProb[ix * maxOutcomes + pick-1];  // move -1 elsewhere TODO
301                         lxk1 += piece;
302                 }
303 #if 0
304 #pragma omp critical(ba81LikelihoodDebug1)
305                 if (!isfinite(lxk1) || lxk1 > numItems) {
306                         mxLog("where\n");
307                         double where[state->maxDims];
308                         pointToWhere(state, quad, where, state->maxDims);
309                         pda(where, state->maxDims, 1);
310                         mxLog("prob\n");
311                         pda(outcomeProb, numItems, maxOutcomes);
312                         error("Likelihood of row %d is %f", rowMap[px], lxk1);
313                 }
314 #endif
315                 lxk[px] = lxk1;
316         }
317
318         Free(outcomeProb);
319
320         return lxk;
321 }
322
323 OMXINLINE static double *
324 ba81LikelihoodFast(omxExpectation *oo, int specific, const int *quad)
325 {
326         omxBA81State *state = (omxBA81State*) oo->argStruct;
327         if (!state->cacheLXK) {
328                 return ba81Likelihood(oo, specific, quad);
329         } else {
330                 return getLXKcache(state, quad, specific);
331         }
332
333 }
334
335 OMXINLINE static void
336 mapLatentSpace(omxBA81State *state, int px, int sgroup, double piece, const double *where)
337 {
338         double *ElatentMean = state->ElatentMean;
339         double *ElatentCov = state->ElatentCov;
340         int maxDims = state->maxDims;
341         int maxAbilities = state->maxAbilities;
342         int pmax = maxDims;
343         if (state->numSpecific) pmax -= 1;
344
345         if (sgroup == 0) {
346                 for (int d1=0; d1 < pmax; d1++) {
347                         double piece_w1 = piece * where[d1];
348                         int mloc = px * maxAbilities + d1;
349 #pragma omp atomic
350                         ElatentMean[mloc] += piece_w1;
351                         for (int d2=0; d2 <= d1; d2++) {
352                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
353                                 double piece_cov = piece_w1 * where[d2];
354 #pragma omp atomic
355                                 ElatentCov[loc] += piece_cov;
356                         }
357                 }
358         }
359
360         if (state->numSpecific) {
361                 int sdim = maxDims + sgroup - 1;
362
363                 double piece_w1 = piece * where[maxDims-1];
364                 int mloc = px * maxAbilities + sdim;
365 #pragma omp atomic
366                 ElatentMean[mloc] += piece_w1;
367
368                 int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
369                 double piece_var = piece_w1 * where[maxDims-1];
370 #pragma omp atomic
371                 ElatentCov[loc] += piece_var;
372         }
373 }
374
375 OMXINLINE static double
376 logAreaProduct(omxBA81State *state, const int *quad, const int sg)
377 {
378         int maxDims = state->maxDims;
379         if (state->numSpecific == 0) {
380                 long qloc = encodeLocation(maxDims, state->quadGridSize, quad);
381                 return state->priLogQarea[qloc];
382         } else {
383                 long priloc = encodeLocation(maxDims-1, state->quadGridSize, quad);
384                 return (state->priLogQarea[priloc] +
385                         state->speLogQarea[sg * state->quadGridSize + quad[maxDims - 1]]);
386         }
387 }
388
389 #define CALC_ALLSLXK(state, numUnique) \
390         (state->allSlxk + omx_absolute_thread_num() * (numUnique))
391
392 #define CALC_SLXK(state, numUnique, numSpecific) \
393         (state->Slxk + omx_absolute_thread_num() * (numUnique) * (numSpecific))
394
395 OMXINLINE static void
396 cai2010(omxExpectation* oo, int recompute, const int *primaryQuad,
397         double *allSlxk, double *Slxk)
398 {
399         omxBA81State *state = (omxBA81State*) oo->argStruct;
400         int numUnique = state->numUnique;
401         int numSpecific = state->numSpecific;
402         int maxDims = state->maxDims;
403         int sDim = maxDims-1;
404
405         int quad[maxDims];
406         memcpy(quad, primaryQuad, sizeof(int)*sDim);
407
408         OMXZERO(Slxk, numUnique * numSpecific);
409         OMXZERO(allSlxk, numUnique);
410
411         for (int sx=0; sx < numSpecific; sx++) {
412                 double *eis = Slxk + numUnique * sx;
413                 int quadGridSize = state->quadGridSize;
414
415                 for (int qx=0; qx < quadGridSize; qx++) {
416                         quad[sDim] = qx;
417                         double where[maxDims];
418                         pointToWhere(state, quad, where, maxDims);
419
420                         double *lxk;
421                         if (recompute) {
422                                 lxk = ba81Likelihood(oo, sx, quad);
423                         } else {
424                                 lxk = getLXKcache(state, quad, sx);
425                         }
426
427                         for (int ix=0; ix < numUnique; ix++) {
428                                 eis[ix] += exp(lxk[ix] + state->priLogQarea[qx]);
429                         }
430                 }
431
432                 for (int px=0; px < numUnique; px++) {
433                         eis[px] = log(eis[px]);
434                         allSlxk[px] += eis[px];
435                 }
436         }
437 }
438
439 OMXINLINE static void
440 decodeLocation(long qx, const int dims, const long grid,
441                int *quad)
442 {
443         for (int dx=0; dx < dims; dx++) {
444                 quad[dx] = qx % grid;
445                 qx = qx / grid;
446         }
447 }
448
449 static void
450 ba81Estep1(omxExpectation *oo) {
451         if(OMX_DEBUG) {mxLog("Beginning %s Computation.\n", NAME);}
452
453         omxBA81State *state = (omxBA81State*) oo->argStruct;
454         double *patternLik = state->patternLik;
455         int numUnique = state->numUnique;
456         int numSpecific = state->numSpecific;
457         double *ElatentMean = state->ElatentMean;
458         double *ElatentCov = state->ElatentCov;
459         int maxDims = state->maxDims;
460         int maxAbilities = state->maxAbilities;
461         int primaryDims = maxDims;
462
463         OMXZERO(patternLik, numUnique);
464         OMXZERO(ElatentMean, numUnique * maxAbilities);
465         OMXZERO(ElatentCov, numUnique * maxAbilities * maxAbilities);
466
467         // E-step, marginalize person ability
468         //
469         // Note: In the notation of Bock & Aitkin (1981) and
470         // Cai~(2010), these loops are reversed.  That is, the inner
471         // loop is over quadrature points and the outer loop is over
472         // all response patterns.
473         //
474         if (numSpecific == 0) {
475 #pragma omp parallel for num_threads(Global->numThreads)
476                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
477                         int quad[maxDims];
478                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
479                         double where[maxDims];
480                         pointToWhere(state, quad, where, maxDims);
481
482                         double *lxk = ba81Likelihood(oo, 0, quad);
483
484                         double logArea = state->priLogQarea[qx];
485 #pragma omp critical(EstepUpdate)
486                         for (int px=0; px < numUnique; px++) {
487                                 double tmp = exp(lxk[px] + logArea);
488 #if 0
489                                 if (!isfinite(tmp)) {
490                                         mxLog("where\n");
491                                         pda(where, maxDims, 1);
492                                         error("Row %d lxk %f logArea %f tmp %f",
493                                               state->rowMap[px], lxk[px], logArea, tmp);
494                                 }
495 #endif
496                                 patternLik[px] += tmp;
497                                 mapLatentSpace(state, px, 0, tmp, where);
498                         }
499                 }
500         } else {
501                 primaryDims -= 1;
502                 int sDim = primaryDims;
503                 long specificPoints = state->quadGridSize;
504
505 #pragma omp parallel for num_threads(Global->numThreads)
506                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
507                         int quad[maxDims];
508                         decodeLocation(qx, primaryDims, state->quadGridSize, quad);
509
510                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
511                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
512                         cai2010(oo, TRUE, quad, allSlxk, Slxk);
513
514                         for (int sgroup=0; sgroup < numSpecific; sgroup++) {
515                                 double *eis = Slxk + numUnique * sgroup;
516                                 for (long sx=0; sx < specificPoints; sx++) {
517                                         quad[sDim] = sx;
518                                         double where[maxDims];
519                                         pointToWhere(state, quad, where, maxDims);
520                                         double logArea = logAreaProduct(state, quad, sgroup);
521                                         double *lxk = ba81LikelihoodFast(oo, sgroup, quad);
522                                         for (int px=0; px < numUnique; px++) {
523                                                 double tmp = exp((allSlxk[px] - eis[px]) + lxk[px] + logArea);
524                                                 mapLatentSpace(state, px, sgroup, tmp, where);
525                                         }
526                                 }
527                         }
528
529                         double priLogArea = state->priLogQarea[qx];
530 #pragma omp critical(EstepUpdate)
531                         for (int px=0; px < numUnique; px++) {
532                                 double tmp = exp(allSlxk[px] + priLogArea);
533                                 patternLik[px] += tmp;  // is it faster to make this line atomic? TODO
534                         }
535                 }
536         }
537
538         int *numIdentical = state->numIdentical;
539
540         if(0) {
541                 mxLog("weight\n");
542                 for (int px=0; px < numUnique; px++) {
543                         double weight = numIdentical[px] / patternLik[px];
544                         mxLog("%20.20f\n", weight);
545                 }
546
547                 mxLog("per item mean\n");
548                 for (int px=0; px < numUnique; px++) {
549                         mxLog("[%d] %20.20f\n", px, ElatentMean[px * maxAbilities]);
550                 }
551         }
552
553         for (int px=0; px < numUnique; px++) {
554                 if (patternLik[px] < MIN_PATTERNLIK) {
555                         patternLik[px] = MIN_PATTERNLIK;
556                         warning("Likelihood of pattern %d is 0, forcing to %.3g",
557                                 px, MIN_PATTERNLIK);
558                 }
559
560                 double weight = numIdentical[px] / patternLik[px];
561                 for (int d1=0; d1 < primaryDims; d1++) {
562                         ElatentMean[px * maxAbilities + d1] *= weight;
563                         for (int d2=0; d2 <= d1; d2++) {
564                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
565                                 ElatentCov[loc] *= weight;
566                         }
567                 }
568                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
569                         ElatentMean[px * maxAbilities + sdim] *= weight;
570                         int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
571                         ElatentCov[loc] *= weight;
572                 }
573 #if 0
574                 if (!isfinite(patternLik[px])) {
575                         error("Likelihood of row %d is %f", state->rowMap[px], patternLik[px]);
576                 }
577 #endif
578                 patternLik[px] = log(patternLik[px]);
579         }
580
581         for (int px=1; px < numUnique; px++) {
582                 for (int d1=0; d1 < primaryDims; d1++) {
583                         ElatentMean[d1] += ElatentMean[px * maxAbilities + d1];
584                         for (int d2=0; d2 <= d1; d2++) {
585                                 int cell = d2 * maxAbilities + d1;
586                                 int loc = px * maxAbilities * maxAbilities + cell;
587                                 ElatentCov[cell] += ElatentCov[loc];
588                         }
589                 }
590                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
591                         ElatentMean[sdim] += ElatentMean[px * maxAbilities + sdim];
592                         int cell = sdim * maxAbilities + sdim;
593                         int loc = px * maxAbilities * maxAbilities + cell;
594                         ElatentCov[cell] += ElatentCov[loc];
595                 }
596         }
597
598         //pda(ElatentMean, state->maxAbilities, 1);
599         //pda(ElatentCov, state->maxAbilities, state->maxAbilities);
600
601         omxData *data = state->data;
602         for (int d1=0; d1 < maxAbilities; d1++) {
603                 ElatentMean[d1] /= data->rows;
604         }
605
606         for (int d1=0; d1 < primaryDims; d1++) {
607                 for (int d2=0; d2 <= d1; d2++) {
608                         int cell = d2 * maxAbilities + d1;
609                         int tcell = d1 * maxAbilities + d2;
610                         ElatentCov[tcell] = ElatentCov[cell] =
611                                 ElatentCov[cell] / data->rows - ElatentMean[d1] * ElatentMean[d2];
612                 }
613         }
614         for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
615                 int cell = sdim * maxAbilities + sdim;
616                 ElatentCov[cell] = ElatentCov[cell] / data->rows - ElatentMean[sdim] * ElatentMean[sdim];
617         }
618
619         //mxLog("E-step\n");
620         //pda(ElatentMean, state->maxAbilities, 1);
621         //pda(ElatentCov, state->maxAbilities, state->maxAbilities);
622 }
623
624 static void
625 schilling_bock_2005_rescale(omxExpectation *oo)
626 {
627         omxBA81State *state = (omxBA81State*) oo->argStruct;
628         omxMatrix *itemSpec = state->itemSpec;
629         omxMatrix *itemParam = state->itemParam;
630         omxMatrix *design = state->design;
631         double *ElatentMean = state->ElatentMean;
632         double *ElatentCov = state->ElatentCov;
633         double *tmpLatentMean = state->tmpLatentMean;
634         double *tmpLatentCov = state->tmpLatentCov;
635         int maxAbilities = state->maxAbilities;
636         int maxDims = state->maxDims;
637
638         //mxLog("schilling bock\n");
639         //pda(ElatentMean, maxAbilities, 1);
640         //pda(ElatentCov, maxAbilities, maxAbilities);
641         //omxPrint(design, "design");
642
643         const char triangle = 'L';
644         F77_CALL(dpotrf)(&triangle, &maxAbilities, ElatentCov, &maxAbilities, &state->choleskyError);
645         if (state->choleskyError != 0) {
646                 warning("Cholesky failed with %d; rescaling disabled", state->choleskyError); // make error TODO?
647                 return;
648         }
649
650         int numItems = itemParam->cols;
651         for (int ix=0; ix < numItems; ix++) {
652                 const double *spec = omxMatrixColumn(itemSpec, ix);
653                 int id = spec[RPF_ISpecID];
654                 const double *rawDesign = omxMatrixColumn(design, ix);
655                 int idesign[design->rows];
656                 int idx = 0;
657                 for (int dx=0; dx < design->rows; dx++) {
658                         if (isfinite(rawDesign[dx])) {
659                                 idesign[idx++] = rawDesign[dx]-1;
660                         } else {
661                                 idesign[idx++] = -1;
662                         }
663                 }
664                 for (int d1=0; d1 < idx; d1++) {
665                         if (idesign[d1] == -1) {
666                                 tmpLatentMean[d1] = 0;
667                         } else {
668                                 tmpLatentMean[d1] = ElatentMean[idesign[d1]];
669                         }
670                         for (int d2=0; d2 <= d1; d2++) {
671                                 int cell = idesign[d2] * maxAbilities + idesign[d1];
672                                 if (idesign[d1] == -1 || idesign[d2] == -1) {
673                                         tmpLatentCov[d2 * maxDims + d1] = d1==d2? 1 : 0;
674                                 } else {
675                                         tmpLatentCov[d2 * maxDims + d1] = ElatentCov[cell];
676                                 }
677                         }
678                 }
679                 if (1) {  // ease debugging, make optional TODO
680                         for (int d1=idx; d1 < maxDims; d1++) tmpLatentMean[d1] = nan("");
681                         for (int d1=0; d1 < maxDims; d1++) {
682                                 for (int d2=0; d2 < maxDims; d2++) {
683                                         if (d1 < idx && d2 < idx) continue;
684                                         tmpLatentCov[d2 * maxDims + d1] = nan("");
685                                 }
686                         }
687                 }
688                 double *iparam = omxMatrixColumn(itemParam, ix);
689                 int *mask = state->paramMap + state->derivPadSize * ix;
690                 rpf_model[id].rescale(spec, iparam, mask, tmpLatentMean, tmpLatentCov);
691         }
692
693         // Copy the adjusted parameters back through OpenMx TODO
694         int numFreeParams = int(oo->freeVarGroup->vars.size());
695         for (int rx=0; rx < itemParam->rows; rx++) {
696                 for (int cx=0; cx < itemParam->cols; cx++) {
697                         int vx = state->paramMap[cx * state->derivPadSize + rx];
698                         if (vx >= 0 && vx < numFreeParams) {
699                                 // TODO fix
700                                 //updateFreeVar(currentState, currentState->freeVarList + vx,
701                                 //                                            omxMatrixElement(itemParam, rx, cx));
702                         }
703                 }
704         }
705 }
706
707 // Attempt G-H grid? http://dbarajassolano.wordpress.com/2012/01/26/on-sparse-grid-quadratures/
708 static void
709 ba81SetupQuadrature(omxExpectation* oo, int gridsize, int flat)
710 {
711         omxBA81State *state = (omxBA81State *) oo->argStruct;
712         int numUnique = state->numUnique;
713         int numThreads = getNumThreads(oo);
714         int maxDims = state->maxDims;
715         int Qwidth = state->Qwidth;
716         int numSpecific = state->numSpecific;
717         int priDims = maxDims - (numSpecific? 1 : 0);
718
719         // try starting small and increasing to the cap TODO
720         state->quadGridSize = gridsize;
721
722         state->totalQuadPoints = 1;
723         for (int dx=0; dx < maxDims; dx++) {
724                 state->totalQuadPoints *= state->quadGridSize;
725         }
726
727         state->totalPrimaryPoints = state->totalQuadPoints;
728
729         if (numSpecific) {
730                 state->totalPrimaryPoints /= state->quadGridSize;
731                 state->speLogQarea = Realloc(state->speLogQarea, state->quadGridSize * gridsize, double);
732         }
733
734         state->Qpoint = Realloc(state->Qpoint, state->quadGridSize, double);
735         state->priLogQarea = Realloc(state->priLogQarea, state->totalPrimaryPoints, double);
736
737         for (int px=0; px < state->quadGridSize; px ++) {
738                 state->Qpoint[px] = Qwidth - px * 2 * Qwidth / (state->quadGridSize-1);
739         }
740
741         if (flat) {
742                 // not sure why this is useful, remove? TODO
743                 double flatd = log(1) - log(state->totalPrimaryPoints);
744                 for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
745                         state->priLogQarea[qx] = flatd;
746                 }
747                 flatd = log(1) - log(state->quadGridSize);
748                 for (int sx=0; sx < numSpecific; sx++) {
749                         for (int qx=0; qx < state->quadGridSize; qx++) {
750                                 state->speLogQarea[ sx * state->quadGridSize + qx] = flatd;
751                         }
752                 }
753         } else {
754                 double totalArea = 0;
755                 for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
756                         int quad[priDims];
757                         decodeLocation(qx, priDims, state->quadGridSize, quad);
758                         double where[priDims];
759                         pointToWhere(state, quad, where, priDims);
760                         state->priLogQarea[qx] = dmvnorm(priDims, where,
761                                                          state->latentMeanOut->data,
762                                                          state->latentCovOut->data);
763                         totalArea += exp(state->priLogQarea[qx]);
764                 }
765                 totalArea = log(totalArea);
766                 for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
767                         state->priLogQarea[qx] -= totalArea;
768                         //mxLog("%.5g,", state->priLogQarea[qx]);
769                 }
770                 //mxLog("\n");
771
772                 for (int sx=0; sx < numSpecific; sx++) {
773                         totalArea = 0;
774                         for (int qx=0; qx < state->quadGridSize; qx++) {
775                                 int covCell = (priDims + sx) * state->maxAbilities + priDims + sx;
776                                 double den = dnorm(state->Qpoint[qx],
777                                                    state->latentMeanOut->data[priDims + sx],
778                                                    state->latentCovOut->data[covCell], TRUE);
779                                 state->speLogQarea[sx * state->quadGridSize + qx] = den;
780                                 totalArea += exp(den);
781                         }
782                         totalArea = log(totalArea);
783                         for (int qx=0; qx < state->quadGridSize; qx++) {
784                                 state->speLogQarea[sx * state->quadGridSize + qx] -= totalArea;
785                         }
786                 }
787         }
788
789         if (!state->cacheLXK) {
790                 state->lxk = Realloc(state->lxk, numUnique * numThreads, double);
791         } else {
792                 int ns = state->numSpecific;
793                 if (ns == 0) ns = 1;
794                 long numOrdinate = ns * state->totalQuadPoints;
795                 state->lxk = Realloc(state->lxk, numUnique * numOrdinate, double);
796         }
797
798         state->expected = Realloc(state->expected, state->totalOutcomes * state->totalQuadPoints, double);
799 }
800
801 OMXINLINE static void
802 updateLatentParam(omxExpectation* oo)
803 {
804         omxBA81State *state = (omxBA81State*) oo->argStruct;
805         int maxAbilities = state->maxAbilities;
806         int meanNum = state->latentMeanOut->matrixNumber;
807         int covNum = state->latentCovOut->matrixNumber;
808
809         // TODO need denom for multigroup
810         size_t numFreeParams = oo->freeVarGroup->vars.size();
811         for (size_t px=0; px < numFreeParams; px++) {
812                 omxFreeVar *fv = oo->freeVarGroup->vars[px];
813                 for (size_t lx=0; lx < fv->locations.size(); ++lx) {
814                         omxFreeVarLocation *loc = &fv->locations[lx];
815                         int matNum = ~loc->matrix;
816                         if (matNum == meanNum) {
817                                 int dx = loc->row * loc->col;
818                                 //updateFreeVar(currentState, fv, state->ElatentMean[dx]);
819                         } else if (matNum == covNum) {
820                                 int cell = loc->col * maxAbilities + loc->row;
821                                 //updateFreeVar(currentState, fv, state->ElatentCov[cell]);
822                         }
823                 }
824         }
825
826         ba81SetupQuadrature(oo, state->targetQpoints, 0);
827 }
828
829 OMXINLINE static void
830 expectedUpdate(omxData *data, const int *rowMap, const int px, const int item,
831                const double observed, const int outcomes, double *out)
832 {
833         int pick = omxIntDataElementUnsafe(data, rowMap[px], item);
834         if (pick == NA_INTEGER) {
835                 double slice = exp(observed - log(outcomes));
836                 for (int ox=0; ox < outcomes; ox++) {
837                         out[ox] += slice;
838                 }
839         } else {
840                 out[pick-1] += exp(observed);
841         }
842 }
843
844 OMXINLINE static void
845 ba81Expected(omxExpectation* oo)
846 {
847         omxBA81State *state = (omxBA81State*) oo->argStruct;
848         omxData *data = state->data;
849         int numSpecific = state->numSpecific;
850         const int *rowMap = state->rowMap;
851         double *patternLik = state->patternLik;
852         double *logNumIdentical = state->logNumIdentical;
853         int numUnique = state->numUnique;
854         int maxDims = state->maxDims;
855         int numItems = state->itemParam->cols;
856         omxMatrix *itemSpec = state->itemSpec;
857         int totalOutcomes = state->totalOutcomes;
858
859         OMXZERO(state->expected, totalOutcomes * state->totalQuadPoints);
860
861         if (numSpecific == 0) {
862 #pragma omp parallel for num_threads(Global->numThreads)
863                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
864                         int quad[maxDims];
865                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
866                         double *lxk = ba81LikelihoodFast(oo, 0, quad);
867                         for (int px=0; px < numUnique; px++) {
868                                 double *out = state->expected + qx * totalOutcomes;
869                                 double observed = logNumIdentical[px] + lxk[px] - patternLik[px];
870                                 for (int ix=0; ix < numItems; ix++) {
871                                         const double *spec = omxMatrixColumn(itemSpec, ix);
872                                         int outcomes = spec[RPF_ISpecOutcomes];
873                                         expectedUpdate(data, rowMap, px, ix, observed, outcomes, out);
874                                         out += outcomes;
875                                 }
876                         }
877                 }
878         } else {
879                 int sDim = state->maxDims-1;
880                 long specificPoints = state->quadGridSize;
881
882 #pragma omp parallel for num_threads(Global->numThreads)
883                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
884                         int quad[maxDims];
885                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
886
887                         // allSlxk, Slxk only depend on the ordinate of the primary dimensions
888                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
889                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
890                         cai2010(oo, !state->cacheLXK, quad, allSlxk, Slxk);
891
892                         for (long sx=0; sx < specificPoints; sx++) {
893                                 quad[sDim] = sx;
894                                 long qloc = encodeLocation(state->maxDims, state->quadGridSize, quad);
895
896                                 for (int sgroup=0; sgroup < numSpecific; sgroup++) {
897                                         double *eis = Slxk + numUnique * sgroup;
898                                         double *lxk = ba81LikelihoodFast(oo, sgroup, quad);
899
900                                         for (int px=0; px < numUnique; px++) {
901                                                 double *out = state->expected + totalOutcomes * qloc;
902
903                                                 for (int ix=0; ix < numItems; ix++) {
904                                                         const double *spec = omxMatrixColumn(itemSpec, ix);
905                                                         int outcomes = spec[RPF_ISpecOutcomes];
906                                                         if (state->Sgroup[ix] == sgroup) {
907                                                                 double observed = logNumIdentical[px] + (allSlxk[px] - eis[px]) +
908                                                                         (lxk[px] - patternLik[px]);
909                                                                 expectedUpdate(data, rowMap, px, ix, observed, outcomes, out);
910                                                         }
911                                                         out += outcomes;
912                                                 }
913                                         }
914                                 }
915                         }
916                 }
917         }
918         //pda(state->expected, state->totalOutcomes, state->totalQuadPoints);
919 }
920
921 static void
922 ba81Estep(omxExpectation *oo, enum ComputeExpectationContext ctx) {
923         if (ctx == COMPUTE_EXPECT_INITIALIZE) return;
924
925         omxBA81State *state = (omxBA81State*) oo->argStruct;
926         if (state->userEitemParam) {
927                 state->userEitemParam = FALSE; // just use regular MxMatrix with free=FALSE TODO
928         } else {
929                 omxCopyMatrix(state->EitemParam, state->itemParam);  // ComputeAssign ?
930         }
931         ba81Estep1(oo);
932         if (ctx == COMPUTE_EXPECT_PREFIT) {
933                 if (state->rescale) schilling_bock_2005_rescale(oo);
934                 ba81Expected(oo);
935                 // for E-M LL
936         } else {
937                 updateLatentParam(oo);
938                 // for regular LL
939         }
940 }
941
942 OMXINLINE static double
943 ba81Fit1Ordinate(omxExpectation* oo, const int *quad, const double *weight, int want)
944 {
945         omxBA81State *state = (omxBA81State*) oo->argStruct;
946         omxMatrix *itemSpec = state->itemSpec;
947         omxMatrix *itemParam = state->itemParam;
948         int numItems = itemParam->cols;
949         int maxOutcomes = state->maxOutcomes;
950         int maxDims = state->maxDims;
951         double *myDeriv = state->thrDeriv + itemParam->cols * state->derivPadSize * omx_absolute_thread_num();
952         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN);
953
954         double where[maxDims];
955         pointToWhere(state, quad, where, maxDims);
956
957         double *outcomeProb = computeRPF(oo, itemParam, quad); // avoid malloc/free? TODO
958         if (!outcomeProb) return 0;
959
960         double thr_ll = 0;
961         for (int ix=0; ix < numItems; ix++) {
962                 const double *spec = omxMatrixColumn(itemSpec, ix);
963                 int id = spec[RPF_ISpecID];
964                 int iOutcomes = spec[RPF_ISpecOutcomes];
965
966                 double area = exp(logAreaProduct(state, quad, state->Sgroup[ix]));   // avoid exp() here? TODO
967                 for (int ox=0; ox < iOutcomes; ox++) {
968 #if 0
969 #pragma omp critical(ba81Fit1OrdinateDebug1)
970                         if (!isfinite(outcomeProb[ix * maxOutcomes + ox])) {
971                                 pda(itemParam->data, itemParam->rows, itemParam->cols);
972                                 pda(outcomeProb, outcomes, numItems);
973                                 error("RPF produced NAs");
974                         }
975 #endif
976                         double got = weight[ox] * outcomeProb[ix * maxOutcomes + ox];
977                         thr_ll += got * area;
978                 }
979
980                 if (do_deriv) {
981                         double *iparam = omxMatrixColumn(itemParam, ix);
982                         double *pad = myDeriv + ix * state->derivPadSize;
983                         (*rpf_model[id].dLL1)(spec, iparam, where, area, weight, pad);
984                 }
985                 weight += iOutcomes;
986         }
987
988         Free(outcomeProb);
989
990         return thr_ll;
991 }
992
993 static double
994 ba81ComputeMFit1(omxExpectation* oo, int want, double *gradient, double *hessian)
995 {
996         omxBA81State *state = (omxBA81State*) oo->argStruct;
997         omxMatrix *customPrior = state->customPrior;
998         omxMatrix *itemParam = state->itemParam;
999         omxMatrix *itemSpec = state->itemSpec;
1000         int maxDims = state->maxDims;
1001         const int totalOutcomes = state->totalOutcomes;
1002
1003         double ll = 0;
1004         if (customPrior) {
1005                 omxRecompute(customPrior);
1006                 ll = customPrior->data[0];
1007         }
1008
1009         if (!isfinite(ll)) {
1010                 omxPrint(itemParam, "item param");
1011                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
1012         }
1013
1014 #pragma omp parallel for num_threads(Global->numThreads)
1015         for (long qx=0; qx < state->totalQuadPoints; qx++) {
1016                 //double area = exp(state->priLogQarea[qx]);  // avoid exp() here? TODO
1017                 int quad[maxDims];
1018                 decodeLocation(qx, maxDims, state->quadGridSize, quad);
1019                 double *weight = state->expected + qx * totalOutcomes;
1020                 double thr_ll = ba81Fit1Ordinate(oo, quad, weight, want);
1021                 
1022 #pragma omp atomic
1023                 ll += thr_ll;
1024         }
1025
1026         if (!customPrior && gradient) {
1027                 double *deriv0 = state->thrDeriv;
1028
1029                 int perThread = itemParam->cols * state->derivPadSize;
1030                 for (int th=1; th < getNumThreads(oo); th++) {
1031                         double *thrD = state->thrDeriv + th * perThread;
1032                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
1033                 }
1034
1035                 int numItems = itemParam->cols;
1036                 for (int ix=0; ix < numItems; ix++) {
1037                         const double *spec = omxMatrixColumn(itemSpec, ix);
1038                         int id = spec[RPF_ISpecID];
1039                         double *iparam = omxMatrixColumn(itemParam, ix);
1040                         double *pad = deriv0 + ix * state->derivPadSize;
1041                         (*rpf_model[id].dLL2)(spec, iparam, pad);
1042                 }
1043
1044                 int numFreeParams = int(oo->freeVarGroup->vars.size());
1045                 int numParams = itemParam->cols * state->derivPadSize;
1046                 for (int ox=0; ox < numParams; ox++) {
1047                         int to = state->paramMap[ox];
1048                         if (to == -1) continue;
1049
1050                         // Need to check because this can happen if
1051                         // lbounds/ubounds are not set appropriately.
1052                         if (0 && !isfinite(deriv0[ox])) {
1053                                 int item = ox / itemParam->rows;
1054                                 mxLog("item parameters:\n");
1055                                 const double *spec = omxMatrixColumn(itemSpec, item);
1056                                 int id = spec[RPF_ISpecID];
1057                                 int numParam = (*rpf_model[id].numParam)(spec);
1058                                 double *iparam = omxMatrixColumn(itemParam, item);
1059                                 pda(iparam, numParam, 1);
1060                                 // Perhaps bounds can be pulled in from librpf? TODO
1061                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
1062                                       ox, item, deriv0[ox]);
1063                         }
1064
1065                         if (to < numFreeParams) {
1066                                 gradient[to] -= deriv0[ox];
1067                         } else {
1068                                 if (hessian) hessian[to - numFreeParams] -= deriv0[ox];
1069                         }
1070                 }
1071         }
1072
1073         return -ll;
1074 }
1075
1076 double
1077 ba81ComputeFit(omxExpectation* oo, int want, double *gradient, double *hessian)
1078 {
1079         if (!want) return 0;
1080
1081         omxBA81State *state = (omxBA81State*) oo->argStruct;
1082
1083         if (want & FF_COMPUTE_FIT) {
1084                 ++state->fitCount;
1085         }
1086
1087         if (want & (FF_COMPUTE_GRADIENT|FF_COMPUTE_HESSIAN)) {
1088                 // M-step
1089                 ++state->gradientCount;
1090
1091                 size_t numFreeParams = oo->freeVarGroup->vars.size();
1092                 OMXZERO(gradient, numFreeParams);
1093                 if (hessian) OMXZERO(hessian, numFreeParams * numFreeParams); // remove if TODO
1094
1095                 omxMatrix *itemParam = state->itemParam;
1096                 OMXZERO(state->thrDeriv, state->derivPadSize * itemParam->cols * getNumThreads(oo));
1097
1098                 double got = ba81ComputeMFit1(oo, want, gradient, hessian);
1099                 state->lastEMLL = got;
1100                 return got;
1101         } else {
1102                 // Major EM iteration, note completely different LL calculation
1103
1104                 double *patternLik = state->patternLik;
1105                 int *numIdentical = state->numIdentical;
1106                 int numUnique = state->numUnique;
1107                 double got = 0;
1108                 for (int ux=0; ux < numUnique; ux++) {
1109                         got += numIdentical[ux] * patternLik[ux];
1110                 }
1111                 return -2 * got;
1112         }
1113 }
1114
1115 static double *
1116 realEAP(omxExpectation *oo)
1117 {
1118         // add openmp parallelization stuff TODO
1119
1120         omxBA81State *state = (omxBA81State *) oo->argStruct;
1121         int numSpecific = state->numSpecific;
1122         int maxDims = state->maxDims;
1123         int priDims = maxDims - (numSpecific? 1 : 0);
1124         int numUnique = state->numUnique;
1125         int maxAbilities = state->maxAbilities;
1126
1127         // TODO Wainer & Thissen. (1987). Estimating ability with the wrong
1128         // model. Journal of Educational Statistics, 12, 339-368.
1129
1130         int numQpoints = state->targetQpoints * 2;  // make configurable TODO
1131
1132         if (numQpoints < 1 + 2.0 * sqrt(state->itemSpec->cols)) {
1133                 // Thissen & Orlando (2001, p. 136)
1134                 warning("EAP requires at least 2*sqrt(items) quadrature points");
1135         }
1136
1137         ba81SetupQuadrature(oo, numQpoints, 0);
1138         ba81Estep1(oo);
1139
1140         /*
1141         double *cov = NULL;
1142         if (maxDims > 1) {
1143                 strcpy(out[2].label, "ability.cov");
1144                 out[2].numValues = -1;
1145                 out[2].rows = maxDims;
1146                 out[2].cols = maxDims;
1147                 out[2].values = (double*) R_alloc(out[2].rows * out[2].cols, sizeof(double));
1148                 cov = out[2].values;
1149                 OMXZERO(cov, out[2].rows * out[2].cols);
1150         }
1151         */
1152
1153         // Need a separate work space because the destination needs
1154         // to be in unsorted order with duplicated rows.
1155         double *ability = Calloc(numUnique * maxAbilities * 2, double);
1156
1157         for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
1158                 int quad[priDims];
1159                 decodeLocation(qx, priDims, state->quadGridSize, quad);
1160                 double where[priDims];
1161                 pointToWhere(state, quad, where, priDims);
1162                 double logArea = state->priLogQarea[qx];
1163
1164                 double *lxk;
1165                 if (numSpecific == 0) {
1166                         lxk = ba81LikelihoodFast(oo, 0, quad);
1167                 } else {
1168                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
1169                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
1170                         cai2010(oo, FALSE, quad, allSlxk, Slxk);
1171                         lxk = allSlxk;
1172                 }
1173
1174                 double *row = ability;
1175                 for (int px=0; px < numUnique; px++) {
1176                         double plik = exp(logArea + lxk[px]);
1177                         for (int dx=0; dx < priDims; dx++) {
1178                                 double piece = where[dx] * plik;
1179                                 row[dx*2] += piece;
1180                                 row[dx*2 + 1] += where[dx] * piece;
1181                                 // ignore cov, for now
1182                         }
1183                         row += 2 * maxAbilities;
1184                 }
1185         }
1186
1187         double *ris = Realloc(NULL, numUnique, double);
1188         for (int sx=0; sx < numSpecific; sx++) {
1189                 for (int sqx=0; sqx < state->quadGridSize; sqx++) {
1190                         double area = exp(state->speLogQarea[sx * state->quadGridSize + sqx]);
1191                         double ptArea = area * state->Qpoint[sqx];
1192                         OMXZERO(ris, numUnique);
1193                         for (int qx=0; qx < state->totalPrimaryPoints; qx++) {
1194                                 int quad[maxDims];
1195                                 decodeLocation(qx, priDims, state->quadGridSize, quad);
1196                                 quad[priDims] = sqx;
1197
1198                                 double *allSlxk = CALC_ALLSLXK(state, numUnique);
1199                                 double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
1200                                 cai2010(oo, FALSE, quad, allSlxk, Slxk);
1201
1202                                 double *eis = Slxk + numUnique * sx;
1203                                 double *lxk = ba81LikelihoodFast(oo, sx, quad);
1204
1205                                 double logArea = state->priLogQarea[qx];
1206                                 for (int px=0; px < numUnique; px++) {
1207                                         ris[px] += exp(logArea + lxk[px] + allSlxk[px] - eis[px]);
1208                                 }
1209                         }
1210                         double *row = ability;
1211                         for (int px=0; px < numUnique; px++) {
1212                                 double piece = ris[px] * ptArea;
1213                                 row[(priDims + sx) * 2] += piece;
1214                                 row[(priDims + sx) * 2 + 1] += piece * state->Qpoint[sqx];
1215                                 row += 2 * maxAbilities;
1216                         }
1217                 }
1218         }
1219         Free(ris);
1220
1221         double *patternLik = state->patternLik;
1222         double *row = ability;
1223         for (int px=0; px < numUnique; px++) {
1224                 double denom = exp(patternLik[px]);
1225                 for (int ax=0; ax < maxAbilities; ax++) {
1226                         row[ax * 2] /= denom;
1227                         row[ax * 2 + 1] /= denom;
1228                         row[ax * 2 + 1] -= row[ax * 2] * row[ax * 2];
1229                 }
1230                 row += 2 * maxAbilities;
1231         }
1232
1233         /*
1234         // make symmetric
1235         for (int d1=0; d1 < maxDims; d1++) {
1236                 for (int d2=0; d2 < d1; d2++) {
1237                         cov[d2 * maxDims + d1] = cov[d1 * maxDims + d2];
1238                 }
1239         }
1240         */
1241
1242         for (int px=0; px < numUnique; px++) {
1243                 double *arow = ability + px * 2 * maxAbilities;
1244                 for (int dx=0; dx < maxAbilities; dx++) {
1245                         arow[dx*2+1] = sqrt(arow[dx*2+1]);
1246                 }
1247         }
1248
1249         return ability;
1250 }
1251
1252 /**
1253  * MAP is not affected by the number of items. EAP is. Likelihood can
1254  * get concentrated in a single quadrature ordinate. For 3PL, response
1255  * patterns can have a bimodal likelihood. This will confuse MAP and
1256  * is a key advantage of EAP (Thissen & Orlando, 2001, p. 136).
1257  *
1258  * Thissen, D. & Orlando, M. (2001). IRT for items scored in two
1259  * categories. In D. Thissen & H. Wainer (Eds.), \emph{Test scoring}
1260  * (pp 73-140). Lawrence Erlbaum Associates, Inc.
1261  */
1262 omxRListElement *
1263 ba81EAP(omxExpectation *oo, int *numReturns)   // rename to "return stuff to user"
1264 {
1265         omxBA81State *state = (omxBA81State *) oo->argStruct;
1266         int maxAbilities = state->maxAbilities;
1267
1268         *numReturns = 1 + (state->scores != SCORES_OMIT);
1269         omxRListElement *out = (omxRListElement*) R_alloc(*numReturns, sizeof(omxRListElement));
1270         int ox=0;
1271
1272         out[ox].numValues = 1;
1273         strcpy(out[ox].label, "EM.LL");
1274         out[ox].values = &state->lastEMLL;
1275         ++ox;
1276
1277         if (state->scores != SCORES_OMIT) {
1278                 double *ability = realEAP(oo);
1279                 int numUnique = state->numUnique;
1280                 omxData *data = state->data;
1281
1282                 int cols = state->scores == SCORES_FULL? data->rows : numUnique;
1283
1284                 strcpy(out[ox].label, "ability");
1285                 out[ox].numValues = -1;
1286                 out[ox].rows = 2 * maxAbilities;
1287                 out[ox].cols = cols;
1288                 out[ox].values = (double*) R_alloc(out[ox].rows * out[ox].cols, sizeof(double));
1289
1290                 if (state->scores == SCORES_FULL) {
1291                         for (int rx=0; rx < numUnique; rx++) {
1292                                 double *pa = ability + rx * 2 * maxAbilities;
1293
1294                                 int dups = omxDataNumIdenticalRows(state->data, state->rowMap[rx]);
1295                                 for (int dup=0; dup < dups; dup++) {
1296                                         int dest = omxDataIndex(data, state->rowMap[rx]+dup);
1297                                         memcpy(out[ox].values + dest * out[ox].rows, pa, sizeof(double) * 2 * maxAbilities);
1298                                 }
1299                         }
1300                 } else {
1301                         memcpy(out[ox].values, ability, sizeof(double) * numUnique * 2 * maxAbilities);
1302                 }
1303                 Free(ability);
1304                 ++ox;
1305         }
1306
1307         for (int ix=0; ix < state->itemParam->cols; ix++) {
1308                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1309                 int id = spec[RPF_ISpecID];
1310                 double *param = omxMatrixColumn(state->itemParam, ix);
1311                 rpf_model[id].postfit(spec, param);
1312         }
1313
1314         return out;
1315 }
1316
1317 static void ba81Destroy(omxExpectation *oo) {
1318         if(OMX_DEBUG) {
1319                 mxLog("Freeing %s function.\n", NAME);
1320         }
1321         omxBA81State *state = (omxBA81State *) oo->argStruct;
1322         //mxLog("fit %d gradient %d\n", state->fitCount, state->gradientCount);
1323         omxFreeAllMatrixData(state->itemSpec);
1324         omxFreeAllMatrixData(state->itemParam);
1325         omxFreeAllMatrixData(state->EitemParam);
1326         omxFreeAllMatrixData(state->design);
1327         omxFreeAllMatrixData(state->customPrior);
1328         omxFreeAllMatrixData(state->latentMeanOut);
1329         omxFreeAllMatrixData(state->latentCovOut);
1330         Free(state->logNumIdentical);
1331         Free(state->numIdentical);
1332         Free(state->Qpoint);
1333         Free(state->priLogQarea);
1334         Free(state->rowMap);
1335         Free(state->patternLik);
1336         Free(state->lxk);
1337         Free(state->Slxk);
1338         Free(state->allSlxk);
1339         Free(state->Sgroup);
1340         Free(state->expected);
1341         Free(state->paramMap);
1342         Free(state->thrDeriv);
1343         Free(state->ElatentMean);
1344         Free(state->ElatentCov);
1345         Free(state->tmpLatentMean);
1346         Free(state->tmpLatentCov);
1347         Free(state);
1348 }
1349
1350 void getMatrixDims(SEXP r_theta, int *rows, int *cols)
1351 {
1352     SEXP matrixDims;
1353     PROTECT(matrixDims = getAttrib(r_theta, R_DimSymbol));
1354     int *dimList = INTEGER(matrixDims);
1355     *rows = dimList[0];
1356     *cols = dimList[1];
1357     UNPROTECT(1);
1358 }
1359
1360 void omxInitExpectationBA81(omxExpectation* oo) {
1361         omxState* currentState = oo->currentState;      
1362         SEXP rObj = oo->rObj;
1363         SEXP tmp;
1364         
1365         if(OMX_DEBUG) {
1366                 mxLog("Initializing %s.\n", NAME);
1367         }
1368         if (!rpf_model) {
1369                 if (0) {
1370                         const int wantVersion = 3;
1371                         int version;
1372                         get_librpf_t get_librpf = (get_librpf_t) R_GetCCallable("rpf", "get_librpf_model_GPL");
1373                         (*get_librpf)(&version, &rpf_numModels, &rpf_model);
1374                         if (version < wantVersion) error("librpf binary API %d installed, at least %d is required",
1375                                                          version, wantVersion);
1376                 } else {
1377                         rpf_numModels = librpf_numModels;
1378                         rpf_model = librpf_model;
1379                 }
1380         }
1381         
1382         omxBA81State *state = Calloc(1, omxBA81State);
1383         oo->argStruct = (void*) state;
1384
1385         PROTECT(tmp = GET_SLOT(rObj, install("data")));
1386         state->data = omxDataLookupFromState(tmp, currentState);
1387
1388         if (strcmp(omxDataType(state->data), "raw") != 0) {
1389                 omxRaiseErrorf(currentState, "%s unable to handle data type %s", NAME, omxDataType(state->data));
1390                 return;
1391         }
1392
1393         state->itemSpec =
1394                 omxNewMatrixFromSlot(rObj, currentState, "ItemSpec");
1395         state->design =
1396                 omxNewMatrixFromSlot(rObj, currentState, "Design");
1397         state->itemParam =
1398                 omxNewMatrixFromSlot(rObj, currentState, "ItemParam");
1399         state->latentMeanOut = omxNewMatrixFromSlot(rObj, currentState, "mean");
1400         if (!state->latentMeanOut) error("Failed to retrieve mean matrix");
1401         state->latentCovOut  = omxNewMatrixFromSlot(rObj, currentState, "cov");
1402         if (!state->latentCovOut) error("Failed to retrieve cov matrix");
1403
1404         state->EitemParam =
1405                 omxNewMatrixFromSlot(rObj, currentState, "EItemParam");
1406         if (state->EitemParam) {
1407                 state->userEitemParam = TRUE;
1408         } else {
1409                 state->EitemParam =
1410                         omxInitTemporaryMatrix(NULL, state->itemParam->rows, state->itemParam->cols,
1411                                                TRUE, currentState);
1412         }
1413
1414         state->customPrior =
1415                 omxNewMatrixFromSlot(rObj, currentState, "CustomPrior");
1416         
1417         oo->computeFun = ba81Estep;
1418         oo->destructFun = ba81Destroy;
1419         
1420         // TODO: Exactly identical rows do not contribute any information.
1421         // The sorting algorithm ought to remove them so we don't waste RAM.
1422         // The following summary stats would be cheaper to calculate too.
1423
1424         int numUnique = 0;
1425         omxData *data = state->data;
1426         if (omxDataNumFactor(data) != data->cols) {
1427                 // verify they are ordered factors TODO
1428                 omxRaiseErrorf(currentState, "%s: all columns must be factors", NAME);
1429                 return;
1430         }
1431
1432         for (int rx=0; rx < data->rows;) {
1433                 rx += omxDataNumIdenticalRows(state->data, rx);
1434                 ++numUnique;
1435         }
1436         state->numUnique = numUnique;
1437
1438         state->rowMap = Realloc(NULL, numUnique, int);
1439         state->numIdentical = Realloc(NULL, numUnique, int);
1440         state->logNumIdentical = Realloc(NULL, numUnique, double);
1441
1442         int numItems = state->itemParam->cols;
1443         if (data->cols != numItems) {
1444                 error("Data has %d columns for %d items", data->cols, numItems);
1445         }
1446
1447         for (int rx=0, ux=0; rx < data->rows; ux++) {
1448                 if (rx == 0) {
1449                         // all NA rows will sort to the top
1450                         int na=0;
1451                         for (int ix=0; ix < numItems; ix++) {
1452                                 if (omxIntDataElement(data, 0, ix) == NA_INTEGER) { ++na; }
1453                         }
1454                         if (na == numItems) {
1455                                 omxRaiseErrorf(currentState, "Remove rows with all NAs");
1456                                 return;
1457                         }
1458                 }
1459                 int dups = omxDataNumIdenticalRows(state->data, rx);
1460                 state->numIdentical[ux] = dups;
1461                 state->logNumIdentical[ux] = log(dups);
1462                 state->rowMap[ux] = rx;
1463                 rx += dups;
1464         }
1465
1466         state->patternLik = Realloc(NULL, numUnique, double);
1467
1468         int numThreads = getNumThreads(oo);
1469
1470         int maxSpec = 0;
1471         int maxParam = 0;
1472         state->maxDims = 0;
1473         state->maxOutcomes = 0;
1474
1475         for (int ix=0; ix < numItems; ix++) {
1476                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1477                 int id = spec[RPF_ISpecID];
1478                 if (id < 0 || id >= rpf_numModels) {
1479                         omxRaiseErrorf(currentState, "ItemSpec column %d has unknown item model %d", ix, id);
1480                         return;
1481                 }
1482
1483                 double *param = omxMatrixColumn(state->itemParam, ix);
1484                 rpf_model[id].prefit(spec, param);
1485         }
1486
1487         int totalOutcomes = 0;
1488         for (int cx = 0; cx < data->cols; cx++) {
1489                 const double *spec = omxMatrixColumn(state->itemSpec, cx);
1490                 int id = spec[RPF_ISpecID];
1491                 int dims = spec[RPF_ISpecDims];
1492                 if (state->maxDims < dims)
1493                         state->maxDims = dims;
1494
1495                 int no = spec[RPF_ISpecOutcomes];
1496                 totalOutcomes += no;
1497                 if (state->maxOutcomes < no)
1498                         state->maxOutcomes = no;
1499
1500                 // TODO this summary stat should be available from omxData
1501                 int dataMax=0;
1502                 for (int rx=0; rx < data->rows; rx++) {
1503                         int pick = omxIntDataElementUnsafe(data, rx, cx);
1504                         if (dataMax < pick)
1505                                 dataMax = pick;
1506                 }
1507                 if (dataMax > no) {
1508                         error("Data for item %d has %d outcomes, not %d", cx+1, dataMax, no);
1509                 } else if (dataMax < no) {
1510                         warning("Data for item %d has only %d outcomes, not %d", cx+1, dataMax, no);
1511                         // promote to error?
1512                         // should complain if an outcome is not represented in the data TODO
1513                 }
1514
1515                 int numSpec = (*rpf_model[id].numSpec)(spec);
1516                 if (maxSpec < numSpec)
1517                         maxSpec = numSpec;
1518
1519                 int numParam = (*rpf_model[id].numParam)(spec);
1520                 if (maxParam < numParam)
1521                         maxParam = numParam;
1522         }
1523
1524         state->totalOutcomes = totalOutcomes;
1525
1526         if (state->itemSpec->cols != data->cols || state->itemSpec->rows != maxSpec) {
1527                 omxRaiseErrorf(currentState, "ItemSpec must have %d item columns and %d rows",
1528                                data->cols, maxSpec);
1529                 return;
1530         }
1531         if (state->itemParam->rows != maxParam) {
1532                 omxRaiseErrorf(currentState, "ItemParam should have %d rows", maxParam);
1533                 return;
1534         }
1535
1536         state->derivPadSize = maxParam + maxParam*(1+maxParam)/2;
1537
1538         if (state->design == NULL) {
1539                 state->maxAbilities = state->maxDims;
1540                 state->design = omxInitTemporaryMatrix(NULL, state->maxDims, numItems,
1541                                        TRUE, currentState);
1542                 for (int ix=0; ix < numItems; ix++) {
1543                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1544                         int dims = spec[RPF_ISpecDims];
1545                         for (int dx=0; dx < state->maxDims; dx++) {
1546                                 omxSetMatrixElement(state->design, dx, ix, dx < dims? (double)dx+1 : nan(""));
1547                         }
1548                 }
1549         } else {
1550                 omxMatrix *design = state->design;
1551                 if (design->cols != numItems ||
1552                     design->rows != state->maxDims) {
1553                         omxRaiseErrorf(currentState, "Design matrix should have %d rows and %d columns",
1554                                        state->maxDims, numItems);
1555                         return;
1556                 }
1557
1558                 state->maxAbilities = 0;
1559                 for (int ix=0; ix < design->rows * design->cols; ix++) {
1560                         double got = design->data[ix];
1561                         if (!R_FINITE(got)) continue;
1562                         if (round(got) != (int)got) error("Design matrix can only contain integers"); // TODO better way?
1563                         if (state->maxAbilities < got)
1564                                 state->maxAbilities = got;
1565                 }
1566                 for (int ix=0; ix < design->cols; ix++) {
1567                         const double *idesign = omxMatrixColumn(design, ix);
1568                         int ddim = 0;
1569                         for (int rx=0; rx < design->rows; rx++) {
1570                                 if (isfinite(idesign[rx])) ddim += 1;
1571                         }
1572                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1573                         int dims = spec[RPF_ISpecDims];
1574                         if (ddim > dims) error("Item %d has %d dims but design assigns %d", ix, dims, ddim);
1575                 }
1576         }
1577         if (state->maxAbilities <= state->maxDims) {
1578                 state->Sgroup = Calloc(numItems, int);
1579         } else {
1580                 // Not sure if this is correct, revisit TODO
1581                 int Sgroup0 = -1;
1582                 state->Sgroup = Realloc(NULL, numItems, int);
1583                 for (int dx=0; dx < state->maxDims; dx++) {
1584                         for (int ix=0; ix < numItems; ix++) {
1585                                 int ability = omxMatrixElement(state->design, dx, ix);
1586                                 if (dx < state->maxDims - 1) {
1587                                         if (Sgroup0 <= ability)
1588                                                 Sgroup0 = ability+1;
1589                                         continue;
1590                                 }
1591                                 int ss=-1;
1592                                 if (ability >= Sgroup0) {
1593                                         if (ss == -1) {
1594                                                 ss = ability;
1595                                         } else {
1596                                                 omxRaiseErrorf(currentState, "Item %d cannot belong to more than "
1597                                                                "1 specific dimension (both %d and %d)",
1598                                                                ix, ss, ability);
1599                                                 return;
1600                                         }
1601                                 }
1602                                 if (ss == -1) ss = Sgroup0;
1603                                 state->Sgroup[ix] = ss - Sgroup0;
1604                         }
1605                 }
1606                 state->numSpecific = state->maxAbilities - state->maxDims + 1;
1607                 state->allSlxk = Realloc(NULL, numUnique * numThreads, double);
1608                 state->Slxk = Realloc(NULL, numUnique * state->numSpecific * numThreads, double);
1609         }
1610
1611         if (state->latentMeanOut->rows * state->latentMeanOut->cols != state->maxAbilities) {
1612                 error("The mean matrix '%s' must be 1x%d or %dx1", state->latentMeanOut->name,
1613                       state->maxAbilities, state->maxAbilities);
1614         }
1615         if (state->latentCovOut->rows != state->maxAbilities ||
1616             state->latentCovOut->cols != state->maxAbilities) {
1617                 error("The cov matrix '%s' must be %dx%d",
1618                       state->latentCovOut->name, state->maxAbilities, state->maxAbilities);
1619         }
1620
1621         PROTECT(tmp = GET_SLOT(rObj, install("cache")));
1622         state->cacheLXK = asLogical(tmp);
1623
1624         PROTECT(tmp = GET_SLOT(rObj, install("rescale")));
1625         state->rescale = asLogical(tmp);
1626
1627         PROTECT(tmp = GET_SLOT(rObj, install("qpoints")));
1628         state->targetQpoints = asReal(tmp);
1629
1630         PROTECT(tmp = GET_SLOT(rObj, install("qwidth")));
1631         state->Qwidth = asReal(tmp);
1632
1633         PROTECT(tmp = GET_SLOT(rObj, install("scores")));
1634         const char *score_option = CHAR(asChar(tmp));
1635         if (strcmp(score_option, "omit")==0) state->scores = SCORES_OMIT;
1636         if (strcmp(score_option, "unique")==0) state->scores = SCORES_UNIQUE;
1637         if (strcmp(score_option, "full")==0) state->scores = SCORES_FULL;
1638
1639         state->ElatentMean = Realloc(NULL, state->maxAbilities * numUnique, double);
1640         state->ElatentCov = Realloc(NULL, state->maxAbilities * state->maxAbilities * numUnique, double);
1641         state->tmpLatentMean = Realloc(NULL, state->maxDims, double);
1642         state->tmpLatentCov = Realloc(NULL, state->maxDims * state->maxDims, double);
1643
1644         ba81SetupQuadrature(oo, state->targetQpoints, 0);
1645
1646         buildParamMap(oo);
1647
1648         // verify data bounded between 1 and numOutcomes TODO
1649         // hm, looks like something could be added to omxData for column summary stats?
1650 }