ifa: Add commas to debugging tool
[openmx:openmx.git] / src / omxExpectationBA81.c
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 // Consider replacing log() with log2() in some places? Not worth it?
19
20 #include "omxExpectation.h"
21 #include "omxOpenmpWrap.h"
22 #include "npsolWrap.h"
23 #include "libirt-rpf.h"
24 #include "omxOptimizer.h"  // remove TODO
25
26 static const char *NAME = "ExpectationBA81";
27
28 static const struct rpf *rpf_model = NULL;
29 static int rpf_numModels;
30
31 typedef struct {
32
33         // data characteristics
34         omxData *data;
35         int numUnique;
36         int *numIdentical;        // length numUnique
37         double *logNumIdentical;  // length numUnique
38         int *rowMap;              // length numUnique
39
40         // item description related
41         omxMatrix *itemSpec;
42         int maxOutcomes;
43         int maxDims;
44         int maxAbilities;
45         int numSpecific;
46         int *Sgroup;              // item's specific group 0..numSpecific-1
47         omxMatrix *design;        // items * maxDims
48
49         // quadrature related
50         int numQpoints;
51         double *Qpoint;
52         double *Qarea;
53         double *logQarea;
54         long *quadGridSize;       // maxDims
55         long totalPrimaryPoints;  // product of quadGridSize except specific dim
56         long totalQuadPoints;     // product of quadGridSize
57
58         // estimation related
59         omxMatrix *EitemParam;    // E step version
60         int userEitemParam;
61         omxMatrix *itemParam;     // M step version
62         omxMatrix *customPrior;   // TODO remove?
63         int derivPadSize;         // maxParam + maxParam*(1+maxParam)/2
64         double *thrDeriv;         // itemParam->cols * derivPadSize * thread
65         int *paramMap;            // itemParam->cols * derivPadSize -> index of free parameter
66         int cacheLXK;             // w/cache,  numUnique * #specific quad points * totalQuadPoints
67         double *lxk;              // wo/cache, numUnique * thread
68         double *allSlxk;          // numUnique * thread
69         double *Slxk;             // numUnique * #specific dimensions * thread
70         double *patternLik;       // numUnique
71         // for multi-group, need to know the reference group TODO
72         double *latentMean;       // maxAbilities * numUnique
73         double *latentCov;        // maxAbilities * maxAbilities * numUnique ; only lower triangle is used
74         double *latentMean1;      // maxDims
75         double *latentCov1;       // maxDims * maxDims ; only lower triangle is used
76         double *latentMeanOut;    // maxAbilities
77         double *latentCovOut;     // maxAbilities * maxAbilities ; only lower triangle is used
78         int doRescale;
79         int choleskyError;
80
81         int converged;
82         int gradientCount;
83         int fitCount;
84 } omxBA81State;
85
86 static void
87 pda(const double *ar, int rows, int cols) {   // column major order
88         for (int rx=0; rx < rows; rx++) {
89                 for (int cx=0; cx < cols; cx++) {
90                         Rprintf("%.6g, ", ar[cx * rows + rx]);
91                 }
92                 Rprintf("\n");
93         }
94 }
95
96 static void
97 pia(const int *ar, int rows, int cols) {   // column major order
98         for (int rx=0; rx < rows; rx++) {
99                 for (int cx=0; cx < cols; cx++) {
100                         Rprintf("%d, ", ar[cx * rows + rx]);
101                 }
102                 Rprintf("\n");
103         }
104 }
105
106 static int
107 getNumThreads(omxExpectation* oo)
108 {
109         int numThreads = oo->currentState->numThreads;
110         if (numThreads < 1) numThreads = 1;
111         return numThreads;
112 }
113
114 static void buildParamMap(omxExpectation* oo)
115 {
116         omxState* currentState = oo->currentState;
117         omxBA81State *state = (omxBA81State *) oo->argStruct;
118         omxMatrix *itemParam = state->itemParam;
119         int size = itemParam->cols * state->derivPadSize;
120
121         state->paramMap = Realloc(NULL, size, int);
122         for (int px=0; px < size; px++) {
123                 state->paramMap[px] = -1;
124         }
125
126         int numFreeParams = currentState->numFreeParams;
127         int *pRow = Realloc(NULL, numFreeParams, int);
128         int *pCol = Realloc(NULL, numFreeParams, int);
129
130         for (int px=0; px < numFreeParams; px++) {
131                 omxFreeVar *fv = currentState->freeVarList + px;
132                 for (int lx=0; lx < fv->numLocations; lx++) {
133                         if (~fv->matrices[lx] == itemParam->matrixNumber) {
134                                 pRow[px] = fv->row[lx];
135                                 pCol[px] = fv->col[lx];
136                                 int at = pCol[px] * state->derivPadSize + pRow[px];
137                                 state->paramMap[at] = px;
138                         }
139                 }
140         }
141
142         for (int p1=0; p1 < numFreeParams; p1++) {
143                 for (int p2=p1; p2 < numFreeParams; p2++) {
144                         if (pCol[p1] != pCol[p2]) continue;
145                         const double *spec = omxMatrixColumn(state->itemSpec, pCol[p1]);
146                         int id = spec[RPF_ISpecID];
147                         int numParam = (*rpf_model[id].numParam)(spec);
148                         int r1 = pRow[p1];
149                         int r2 = pRow[p2];
150                         if (r1 > r2) { int tmp=r1; r1=r2; r2=tmp; }
151                         int rowOffset = 0;
152                         for (int rx=1; rx <= r2; rx++) rowOffset += rx;
153                         int at = pCol[p1] * state->derivPadSize + numParam + rowOffset + r1;
154                         state->paramMap[at] = numFreeParams + p1 * numFreeParams + p2;
155                 }
156         }
157
158         Free(pRow);
159         Free(pCol);
160
161         state->thrDeriv = Realloc(NULL, itemParam->cols * state->derivPadSize * getNumThreads(oo), double);
162 }
163
164 OMXINLINE static void
165 pointToWhere(omxBA81State *state, const int *quad, double *where, int upto)
166 {
167         for (int dx=0; dx < upto; dx++) {
168                 where[dx] = state->Qpoint[quad[dx]];
169         }
170 }
171
172 OMXINLINE static void
173 assignDims(omxMatrix *itemSpec, omxMatrix *design, int dims, int maxDims, int ix,
174            const double *restrict theta, double *restrict ptheta)
175 {
176         for (int dx=0; dx < dims; dx++) {
177                 int ability = (int)omxMatrixElement(design, dx, ix) - 1;
178                 if (ability >= maxDims) ability = maxDims-1;
179                 ptheta[dx] = theta[ability];
180         }
181 }
182
183 /**
184  * \param theta Vector of ability parameters, one per ability
185  * \returns A numItems by maxOutcomes colMajor vector of doubles. Caller must Free it.
186  */
187 static double *
188 computeRPF(omxExpectation *oo, omxMatrix *itemParam, const int *quad)
189 {
190         omxBA81State *state = (omxBA81State*) oo->argStruct;
191         omxMatrix *itemSpec = state->itemSpec;
192         int numItems = itemSpec->cols;
193         omxMatrix *design = state->design;
194         int maxDims = state->maxDims;
195
196         double theta[maxDims];
197         pointToWhere(state, quad, theta, maxDims);
198
199         double *outcomeProb = Realloc(NULL, numItems * state->maxOutcomes, double);
200         //double *outcomeProb = Calloc(numItems * state->maxOutcomes, double);
201
202         for (int ix=0; ix < numItems; ix++) {
203                 const double *spec = omxMatrixColumn(itemSpec, ix);
204                 double *iparam = omxMatrixColumn(itemParam, ix);
205                 double *out = outcomeProb + ix * state->maxOutcomes;
206                 int id = spec[RPF_ISpecID];
207                 int dims = spec[RPF_ISpecDims];
208                 double ptheta[dims];
209                 assignDims(itemSpec, design, dims, maxDims, ix, theta, ptheta);
210                 (*rpf_model[id].logprob)(spec, iparam, ptheta, out);
211 #if 0
212                 for (int ox=0; ox < spec[RPF_ISpecOutcomes]; ox++) {
213                         if (!isfinite(out[ox]) || out[ox] > 0) {
214                                 Rprintf("spec\n");
215                                 pda(spec, itemSpec->rows, 1);
216                                 Rprintf("item param\n");
217                                 pda(iparam, itemParam->rows, 1);
218                                 Rprintf("where\n");
219                                 pda(ptheta, dims, 1);
220                                 error("RPF returned %20.20f", out[ox]);
221                         }
222                 }
223 #endif
224         }
225
226         return outcomeProb;
227 }
228
229 OMXINLINE static long
230 encodeLocation(const int dims, const long *restrict grid, const int *restrict quad)
231 {
232         long qx = 0;
233         for (int dx=dims-1; dx >= 0; dx--) {
234                 qx = qx * grid[dx];
235                 qx += quad[dx];
236         }
237         return qx;
238 }
239
240 #define CALC_LXK_CACHED(state, numUnique, quad, tqp, specific) \
241         ((state)->lxk + \
242          (numUnique) * encodeLocation((state)->maxDims, (state)->quadGridSize, quad) + \
243          (numUnique) * (tqp) * (specific))
244
245 OMXINLINE static double *
246 ba81Likelihood(omxExpectation *oo, int specific, const int *restrict quad)
247 {
248         omxBA81State *state = (omxBA81State*) oo->argStruct;
249         int numUnique = state->numUnique;
250         int maxOutcomes = state->maxOutcomes;
251         omxData *data = state->data;
252         int numItems = state->itemSpec->cols;
253         int *restrict Sgroup = state->Sgroup;
254         double *restrict lxk;
255
256         if (!state->cacheLXK) {
257                 lxk = state->lxk + numUnique * omx_absolute_thread_num();
258         } else {
259                 lxk = CALC_LXK_CACHED(state, numUnique, quad, state->totalQuadPoints, specific);
260         }
261
262         const double *outcomeProb = computeRPF(oo, state->EitemParam, quad);
263         if (!outcomeProb) {
264                 OMXZERO(lxk, numUnique);
265                 return lxk;
266         }
267
268         const int *rowMap = state->rowMap;
269         for (int px=0; px < numUnique; px++) {
270                 double lxk1 = 0;
271                 for (int ix=0; ix < numItems; ix++) {
272                         if (specific != Sgroup[ix]) continue;
273                         int pick = omxIntDataElementUnsafe(data, rowMap[px], ix);
274                         if (pick == NA_INTEGER) continue;
275                         double piece = outcomeProb[ix * maxOutcomes + pick-1];  // move -1 elsewhere TODO
276                         lxk1 += piece;
277                 }
278 #if 0
279 #pragma omp critical(ba81LikelihoodDebug1)
280                 if (!isfinite(lxk1) || lxk1 > numItems) {
281                         Rprintf("where\n");
282                         double where[state->maxDims];
283                         pointToWhere(state, quad, where, state->maxDims);
284                         pda(where, state->maxDims, 1);
285                         Rprintf("prob\n");
286                         pda(outcomeProb, numItems, maxOutcomes);
287                         error("Likelihood of row %d is %f", rowMap[px], lxk1);
288                 }
289 #endif
290                 lxk[px] = lxk1;
291         }
292
293         Free(outcomeProb);
294
295         return lxk;
296 }
297
298 OMXINLINE static double *
299 ba81LikelihoodFast(omxExpectation *oo, int specific, const int *restrict quad)
300 {
301         omxBA81State *state = (omxBA81State*) oo->argStruct;
302         if (!state->cacheLXK) {
303                 return ba81LikelihoodFast(oo, specific, quad);
304         } else {
305                 return CALC_LXK_CACHED(state, state->numUnique, quad, state->totalQuadPoints, specific);
306         }
307
308 }
309
310 OMXINLINE static void
311 mapLatentSpace(omxBA81State *state, int px, int sgroup, double piece, const double *where)
312 {
313         double *latentMean = state->latentMean;
314         double *latentCov = state->latentCov;
315         int maxDims = state->maxDims;
316         int maxAbilities = state->maxAbilities;
317         int pmax = maxDims;
318         if (state->numSpecific) pmax -= 1;
319
320         if (sgroup == 0) {
321                 for (int d1=0; d1 < pmax; d1++) {
322                         double piece_w1 = piece * where[d1];
323                         int mloc = px * maxAbilities + d1;
324 #pragma omp atomic
325                         latentMean[mloc] += piece_w1;
326                         for (int d2=0; d2 <= d1; d2++) {
327                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
328                                 double piece_cov = piece_w1 * where[d2];
329 #pragma omp atomic
330                                 latentCov[loc] += piece_cov;
331                         }
332                 }
333         }
334
335         if (state->numSpecific) {
336                 int sdim = maxDims + sgroup - 1;
337
338                 double piece_w1 = piece * where[maxDims-1];
339                 int mloc = px * maxAbilities + sdim;
340 #pragma omp atomic
341                 latentMean[mloc] += piece_w1;
342
343                 int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
344                 double piece_var = piece_w1 * where[maxDims-1];
345 #pragma omp atomic
346                 latentCov[loc] += piece_var;
347         }
348 }
349
350 OMXINLINE static double
351 logAreaProduct(omxBA81State *state, const int *restrict quad, const int upto)
352 {
353         double logArea = 0;
354         for (int dx=0; dx < upto; dx++) {
355                 logArea += state->logQarea[quad[dx]];
356         }
357         return logArea;
358 }
359
360 #define CALC_ALLSLXK(state, numUnique) \
361         (state->allSlxk + omx_absolute_thread_num() * (numUnique))
362
363 #define CALC_SLXK(state, numUnique, numSpecific) \
364         (state->Slxk + omx_absolute_thread_num() * (numUnique) * (numSpecific))
365
366 OMXINLINE static void
367 cai2010(omxExpectation* oo, int recompute, const int *restrict primaryQuad,
368         double *restrict allSlxk, double *restrict Slxk)
369 {
370         omxBA81State *state = (omxBA81State*) oo->argStruct;
371         int numUnique = state->numUnique;
372         int numSpecific = state->numSpecific;
373         int maxDims = state->maxDims;
374         int sDim = maxDims-1;
375
376         int quad[maxDims];
377         memcpy(quad, primaryQuad, sizeof(int)*sDim);
378
379         OMXZERO(Slxk, numUnique * numSpecific);
380         OMXZERO(allSlxk, numUnique);
381
382         for (int sx=0; sx < numSpecific; sx++) {
383                 double *eis = Slxk + numUnique * sx;
384                 int quadGridSize = state->quadGridSize[sDim];
385
386                 for (int qx=0; qx < quadGridSize; qx++) {
387                         quad[sDim] = qx;
388                         double where[maxDims];
389                         pointToWhere(state, quad, where, maxDims);
390
391                         double *lxk;
392                         if (recompute) {
393                                 lxk = ba81Likelihood(oo, sx, quad);
394                         } else {
395                                 lxk = CALC_LXK_CACHED(state, numUnique, quad, state->totalQuadPoints, sx);
396                         }
397
398                         for (int ix=0; ix < numUnique; ix++) {
399                                 eis[ix] += exp(lxk[ix] + state->logQarea[qx]);
400                         }
401                 }
402
403                 for (int px=0; px < numUnique; px++) {
404                         eis[px] = log(eis[px]);
405                         allSlxk[px] += eis[px];
406                 }
407         }
408 }
409
410 OMXINLINE static void
411 decodeLocation(long qx, const int dims, const long *restrict grid,
412                int *restrict quad)
413 {
414         for (int dx=0; dx < dims; dx++) {
415                 quad[dx] = qx % grid[dx];
416                 qx = qx / grid[dx];
417         }
418 }
419
420 static void
421 ba81Estep1(omxExpectation *oo) {
422         if(OMX_DEBUG_MML) {Rprintf("Beginning %s Computation.\n", NAME);}
423
424         omxBA81State *state = (omxBA81State*) oo->argStruct;
425         double *patternLik = state->patternLik;
426         int numUnique = state->numUnique;
427         int numSpecific = state->numSpecific;
428         double *latentMean = state->latentMean;
429         double *latentCov = state->latentCov;
430         int maxDims = state->maxDims;
431         int maxAbilities = state->maxAbilities;
432         int primaryDims = maxDims;
433
434         OMXZERO(patternLik, numUnique);
435         OMXZERO(latentMean, numUnique * maxAbilities);
436         OMXZERO(latentCov, numUnique * maxAbilities * maxAbilities);
437
438         // E-step, marginalize person ability
439         //
440         // Note: In the notation of Bock & Aitkin (1981) and
441         // Cai~(2010), these loops are reversed.  That is, the inner
442         // loop is over quadrature points and the outer loop is over
443         // all response patterns.
444         //
445         if (numSpecific == 0) {
446 #pragma omp parallel for num_threads(oo->currentState->numThreads)
447                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
448                         int quad[maxDims];
449                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
450                         double where[maxDims];
451                         pointToWhere(state, quad, where, maxDims);
452
453                         double *lxk = ba81Likelihood(oo, 0, quad);
454
455                         double logArea = logAreaProduct(state, quad, maxDims);
456 #pragma omp critical(EstepUpdate)
457                         for (int px=0; px < numUnique; px++) {
458                                 double tmp = exp(lxk[px] + logArea);
459 #if 0
460                                 if (!isfinite(tmp)) {
461                                         Rprintf("where\n");
462                                         pda(where, maxDims, 1);
463                                         error("Row %d lxk %f logArea %f tmp %f",
464                                               state->rowMap[px], lxk[px], logArea, tmp);
465                                 }
466 #endif
467                                 patternLik[px] += tmp;
468                                 mapLatentSpace(state, px, 0, tmp, where);
469                         }
470                 }
471         } else {
472                 primaryDims -= 1;
473                 int sDim = primaryDims;
474                 long specificPoints = state->quadGridSize[sDim];
475
476 #pragma omp parallel for num_threads(oo->currentState->numThreads)
477                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
478                         int quad[maxDims];
479                         decodeLocation(qx, primaryDims, state->quadGridSize, quad);
480
481                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
482                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
483                         cai2010(oo, TRUE, quad, allSlxk, Slxk);
484
485                         for (int sgroup=0; sgroup < numSpecific; sgroup++) {
486                                 double *eis = Slxk + numUnique * sgroup;
487                                 for (long sx=0; sx < specificPoints; sx++) {
488                                         quad[sDim] = sx;
489                                         double where[maxDims];
490                                         pointToWhere(state, quad, where, maxDims);
491                                         double logArea = logAreaProduct(state, quad, maxDims);
492                                         double *lxk = ba81LikelihoodFast(oo, sgroup, quad);
493                                         for (int px=0; px < numUnique; px++) {
494                                                 double tmp = exp((allSlxk[px] - eis[px]) + lxk[px] + logArea);
495                                                 mapLatentSpace(state, px, sgroup, tmp, where);
496                                         }
497                                 }
498                         }
499
500                         double priLogArea = logAreaProduct(state, quad, primaryDims);
501 #pragma omp critical(EstepUpdate)
502                         for (int px=0; px < numUnique; px++) {
503                                 double tmp = exp(allSlxk[px] + priLogArea);
504                                 patternLik[px] += tmp;  // is it faster to make this line atomic? TODO
505                         }
506                 }
507         }
508
509         int *numIdentical = state->numIdentical;
510
511         if(0) {
512                 Rprintf("weight\n");
513                 for (int px=0; px < numUnique; px++) {
514                         double weight = numIdentical[px] / patternLik[px];
515                         Rprintf("%20.20f\n", weight);
516                 }
517
518                 Rprintf("per item mean\n");
519                 for (int px=0; px < numUnique; px++) {
520                         Rprintf("[%d] %20.20f\n", px, latentMean[px * maxAbilities]);
521                 }
522         }
523
524         for (int px=0; px < numUnique; px++) {
525                 double weight = numIdentical[px] / patternLik[px];
526                 for (int d1=0; d1 < primaryDims; d1++) {
527                         latentMean[px * maxAbilities + d1] *= weight;
528                         for (int d2=0; d2 <= d1; d2++) {
529                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
530                                 latentCov[loc] *= weight;
531                         }
532                 }
533                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
534                         latentMean[px * maxAbilities + sdim] *= weight;
535                         int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
536                         latentCov[loc] *= weight;
537                 }
538 #if 0
539                 if (!isfinite(patternLik[px])) {
540                         error("Likelihood of row %d is %f", state->rowMap[px], patternLik[px]);
541                 }
542 #endif
543                 patternLik[px] = log(patternLik[px]);
544         }
545
546         for (int px=1; px < numUnique; px++) {
547                 for (int d1=0; d1 < primaryDims; d1++) {
548                         latentMean[d1] += latentMean[px * maxAbilities + d1];
549                         for (int d2=0; d2 <= d1; d2++) {
550                                 int cell = d2 * maxAbilities + d1;
551                                 int loc = px * maxAbilities * maxAbilities + cell;
552                                 latentCov[cell] += latentCov[loc];
553                         }
554                 }
555                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
556                         latentMean[sdim] += latentMean[px * maxAbilities + sdim];
557                         int cell = sdim * maxAbilities + sdim;
558                         int loc = px * maxAbilities * maxAbilities + cell;
559                         latentCov[cell] += latentCov[loc];
560                 }
561         }
562
563         //pda(latentMean, state->maxAbilities, 1);
564         //pda(latentCov, state->maxAbilities, state->maxAbilities);
565
566         // only consider reference group TODO
567         omxData *data = state->data;
568         for (int d1=0; d1 < maxAbilities; d1++) {
569                 latentMean[d1] /= data->rows;
570         }
571
572         for (int d1=0; d1 < primaryDims; d1++) {
573                 for (int d2=0; d2 <= d1; d2++) {
574                         int cell = d2 * maxAbilities + d1;
575                         int tcell = d1 * maxAbilities + d2;
576                         latentCov[tcell] = latentCov[cell] =
577                                 latentCov[cell] / data->rows - latentMean[d1] * latentMean[d2];
578                 }
579         }
580         for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
581                 int cell = sdim * maxAbilities + sdim;
582                 latentCov[cell] = latentCov[cell] / data->rows - latentMean[sdim] * latentMean[sdim];
583         }
584
585         if (state->converged) return;
586
587         //pda(latentMean, state->maxAbilities, 1);
588         //pda(latentCov, state->maxAbilities, state->maxAbilities);
589
590         memcpy(state->latentMeanOut, state->latentMean, maxAbilities * sizeof(double));
591         memcpy(state->latentCovOut, state->latentCov, maxAbilities * maxAbilities * sizeof(double));
592
593         const char triangle = 'L';
594         F77_CALL(dpotrf)(&triangle, &maxAbilities, latentCov, &maxAbilities, &state->choleskyError);
595         if (state->choleskyError != 0) {
596                 warning("Cholesky failed with %d; rescaling disabled", state->choleskyError); // make error TODO?
597         }
598 }
599
600 static void
601 schilling_bock_2005_rescale(omxExpectation *oo)
602 {
603         omxBA81State *state = (omxBA81State*) oo->argStruct;
604         omxMatrix *itemSpec = state->itemSpec;
605         omxMatrix *itemParam = state->itemParam;
606         omxMatrix *design = state->design;
607         double *latentMean = state->latentMean;
608         double *latentCov = state->latentCov;
609         double *latentMean1 = state->latentMean1;
610         double *latentCov1 = state->latentCov1;
611         int maxAbilities = state->maxAbilities;
612         int maxDims = state->maxDims;
613
614         //Rprintf("schilling bock\n");
615         //pda(latentMean, maxAbilities, 1);
616         //pda(latentCov, maxAbilities, maxAbilities);
617         //omxPrint(design, "design");
618
619         if (state->choleskyError != 0) return;
620
621         int numItems = itemParam->cols;
622         for (int ix=0; ix < numItems; ix++) {
623                 const double *spec = omxMatrixColumn(itemSpec, ix);
624                 int id = spec[RPF_ISpecID];
625                 const double *rawDesign = omxMatrixColumn(design, ix);
626                 int idesign[design->rows];
627                 int idx = 0;
628                 for (int dx=0; dx < design->rows; dx++) {
629                         if (isfinite(rawDesign[dx])) {
630                                 idesign[idx++] = rawDesign[dx]-1;
631                         } else {
632                                 idesign[idx++] = -1;
633                         }
634                 }
635                 for (int d1=0; d1 < idx; d1++) {
636                         if (idesign[d1] == -1) {
637                                 latentMean1[d1] = 0;
638                         } else {
639                                 latentMean1[d1] = latentMean[idesign[d1]];
640                         }
641                         for (int d2=0; d2 <= d1; d2++) {
642                                 if (idesign[d1] == -1 || idesign[d2] == -1) {
643                                         latentCov1[d2 * maxDims + d1] = 0;
644                                 } else {
645                                         latentCov1[d2 * maxDims + d1] =
646                                                 latentCov[idesign[d2] * maxAbilities + idesign[d1]];
647                                 }
648                         }
649                 }
650                 if (1) {  // make optional TODO
651                         for (int d1=idx; d1 < maxDims; d1++) latentMean1[d1] = nan("");
652                         for (int d1=0; d1 < maxDims; d1++) {
653                                 for (int d2=0; d2 < maxDims; d2++) {
654                                         if (d1 < idx && d2 < idx) continue;
655                                         latentCov1[d2 * maxDims + d1] = nan("");
656                                 }
657                         }
658                 }
659                 //pda(latentMean1, maxDims, 1);
660                 //pda(latentCov1, maxDims, maxDims);
661                 double *iparam = omxMatrixColumn(itemParam, ix);
662                 int *mask = state->paramMap + state->derivPadSize * ix;
663                 rpf_model[id].rescale(spec, iparam, mask, latentMean1, latentCov1);
664         }
665
666         // This is an egregious hack :-)
667         omxState* currentState = oo->currentState;
668         int numFreeParams = currentState->numFreeParams;
669         double param[numFreeParams];
670         for (int rx=0; rx < itemParam->rows; rx++) {
671                 for (int cx=0; cx < itemParam->cols; cx++) {
672                         int vx = state->paramMap[cx * state->derivPadSize + rx];
673                         if (vx >= 0 && vx < numFreeParams) {
674                                 // Look for gradient entries to determine what to copy.
675                                 // I said it was a hack, no?
676                                 param[vx] = omxMatrixElement(itemParam, rx, cx);
677                         }
678                 }
679         }
680         handleFreeVarList(currentState, param, numFreeParams);
681 }
682
683 static void
684 ba81Estep(omxExpectation *oo) {
685         omxBA81State *state = (omxBA81State*) oo->argStruct;
686         if (state->userEitemParam) {
687                 state->userEitemParam = FALSE;
688         } else {
689                 omxCopyMatrix(state->EitemParam, state->itemParam);
690         }
691         ba81Estep1(oo);
692         if (state->doRescale) schilling_bock_2005_rescale(oo);
693 }
694
695 OMXINLINE static void
696 expectedUpdate(omxData *restrict data, const int *rowMap, const int px, const int item,
697                const double observed, const int outcomes, double *out)
698 {
699         int pick = omxIntDataElementUnsafe(data, rowMap[px], item);
700         if (pick == NA_INTEGER) {
701                 double slice = exp(observed - log(outcomes));
702                 for (int ox=0; ox < outcomes; ox++) {
703                         out[ox] += slice;
704                 }
705         } else {
706                 out[pick-1] += exp(observed);
707         }
708 }
709
710 /** 
711  * \param quad a vector that indexes into a multidimensional quadrature
712  * \param out points to an array numOutcomes wide
713  */
714 OMXINLINE static void
715 ba81Weight(omxExpectation* oo, const int item, const int *quad, int outcomes, double *out)
716 {
717         // TODO move this to the E step and discard the lxk cache ASAP
718         omxBA81State *state = (omxBA81State*) oo->argStruct;
719         omxData *data = state->data;
720         const int *rowMap = state->rowMap;
721         int specific = state->Sgroup[item];
722         double *patternLik = state->patternLik;
723         double *logNumIdentical = state->logNumIdentical;
724         int numUnique = state->numUnique;
725         int numSpecific = state->numSpecific;
726         int sDim = state->maxDims-1;
727
728         OMXZERO(out, outcomes);
729
730         if (numSpecific == 0) {
731                 double *lxk = ba81LikelihoodFast(oo, specific, quad);
732                 for (int px=0; px < numUnique; px++) {
733                         double observed = logNumIdentical[px] + lxk[px] - patternLik[px];
734                         expectedUpdate(data, rowMap, px, item, observed, outcomes, out);
735                 }
736         } else {
737                 double *allSlxk = CALC_ALLSLXK(state, numUnique);
738                 double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
739                 if (quad[sDim] == 0) {
740                         // allSlxk, Slxk only depend on the ordinate of the primary dimensions
741                         cai2010(oo, !state->cacheLXK, quad, allSlxk, Slxk);
742                 }
743                 double *eis = Slxk + numUnique * specific;
744
745                 // Avoid recalc with modest buffer? TODO
746                 double *lxk = ba81LikelihoodFast(oo, specific, quad);
747
748                 for (int px=0; px < numUnique; px++) {
749                         double observed = logNumIdentical[px] + (allSlxk[px] - eis[px]) +
750                                 (lxk[px] - patternLik[px]);
751                         expectedUpdate(data, rowMap, px, item, observed, outcomes, out);
752                 }
753         }
754 }
755
756 OMXINLINE static double
757 ba81Fit1Ordinate(omxExpectation* oo, const int *quad, int want)
758 {
759         omxBA81State *state = (omxBA81State*) oo->argStruct;
760         omxMatrix *itemSpec = state->itemSpec;
761         omxMatrix *itemParam = state->itemParam;
762         int numItems = itemParam->cols;
763         int maxOutcomes = state->maxOutcomes;
764         int maxDims = state->maxDims;
765         double *myDeriv = state->thrDeriv + itemParam->cols * state->derivPadSize * omx_absolute_thread_num();
766         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN);
767
768         double where[maxDims];
769         pointToWhere(state, quad, where, maxDims);
770         const double area = exp(logAreaProduct(state,  quad, maxDims));
771
772         double *outcomeProb = computeRPF(oo, itemParam, quad); // avoid malloc/free? TODO
773         if (!outcomeProb) return 0;
774
775         double thr_ll = 0;
776         for (int ix=0; ix < numItems; ix++) {
777                 const double *spec = omxMatrixColumn(itemSpec, ix);
778                 int id = spec[RPF_ISpecID];
779                 int outcomes = spec[RPF_ISpecOutcomes];
780
781                 double weight[outcomes];
782                 ba81Weight(oo, ix, quad, outcomes, weight);
783                 for (int ox=0; ox < outcomes; ox++) {
784 #if 0
785 #pragma omp critical(ba81Fit1OrdinateDebug1)
786                         if (!isfinite(outcomeProb[ix * maxOutcomes + ox])) {
787                                 pda(itemParam->data, itemParam->rows, itemParam->cols);
788                                 pda(outcomeProb, outcomes, numItems);
789                                 error("RPF produced NAs");
790                         }
791 #endif
792                         double got = weight[ox] * outcomeProb[ix * maxOutcomes + ox];
793                         thr_ll += got * area;
794                 }
795
796                 if (do_deriv) {
797                         double *iparam = omxMatrixColumn(itemParam, ix);
798                         double *pad = myDeriv + ix * state->derivPadSize;
799                         (*rpf_model[id].dLL1)(spec, iparam, where, area, weight, pad);
800                 }
801         }
802
803         Free(outcomeProb);
804
805         return thr_ll;
806 }
807
808 static double
809 ba81ComputeFit1(omxExpectation* oo, int want, double *gradient, double *hessian)
810 {
811         omxBA81State *state = (omxBA81State*) oo->argStruct;
812         omxState* currentState = oo->currentState;  // only used in #pragma omp
813         omxMatrix *customPrior = state->customPrior;
814         omxMatrix *itemParam = state->itemParam;
815         omxMatrix *itemSpec = state->itemSpec;
816         int numSpecific = state->numSpecific;
817         int maxDims = state->maxDims;
818
819         double ll = 0;
820         if (customPrior) {
821                 omxRecompute(customPrior);
822                 ll = customPrior->data[0];
823         }
824
825         if (!isfinite(ll)) {
826                 omxPrint(itemParam, "item param");
827                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
828         }
829
830         if (numSpecific == 0) {
831 #pragma omp parallel for num_threads(currentState->numThreads)
832                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
833                         int quad[maxDims];
834                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
835                         double thr_ll = ba81Fit1Ordinate(oo, quad, want);
836
837 #pragma omp atomic
838                         ll += thr_ll;
839                 }
840         } else {
841                 int sDim = state->maxDims-1;
842                 long *quadGridSize = state->quadGridSize;
843
844 #pragma omp parallel for num_threads(currentState->numThreads)
845                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
846                         int quad[maxDims];
847                         decodeLocation(qx, maxDims, quadGridSize, quad);
848
849                         double thr_ll = 0;
850                         long specificPoints = quadGridSize[sDim];
851                         for (long sx=0; sx < specificPoints; sx++) {
852                                 quad[sDim] = sx;
853                                 thr_ll += ba81Fit1Ordinate(oo, quad, want);
854                         }
855 #pragma omp atomic
856                         ll += thr_ll;
857                 }
858         }
859
860         if (!customPrior && gradient) {
861                 double *deriv0 = state->thrDeriv;
862
863                 int perThread = itemParam->cols * state->derivPadSize;
864                 for (int th=1; th < getNumThreads(oo); th++) {
865                         double *thrD = state->thrDeriv + th * perThread;
866                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
867                 }
868
869                 int numItems = itemParam->cols;
870                 for (int ix=0; ix < numItems; ix++) {
871                         const double *spec = omxMatrixColumn(itemSpec, ix);
872                         int id = spec[RPF_ISpecID];
873                         double *iparam = omxMatrixColumn(itemParam, ix);
874                         double *pad = deriv0 + ix * state->derivPadSize;
875                         (*rpf_model[id].dLL2)(spec, iparam, pad);
876                 }
877
878                 int numFreeParams = currentState->numFreeParams;
879                 int numParams = itemParam->cols * state->derivPadSize;
880                 for (int ox=0; ox < numParams; ox++) {
881                         int to = state->paramMap[ox];
882                         if (to == -1) continue;
883
884                         // Need to check because this can happen if
885                         // lbounds/ubounds are not set appropriately.
886                         if (0 && !isfinite(deriv0[ox])) {
887                                 int item = ox / itemParam->rows;
888                                 Rprintf("item parameters:\n");
889                                 const double *spec = omxMatrixColumn(itemSpec, item);
890                                 int id = spec[RPF_ISpecID];
891                                 int numParam = (*rpf_model[id].numParam)(spec);
892                                 double *iparam = omxMatrixColumn(itemParam, item);
893                                 pda(iparam, numParam, 1);
894                                 // Perhaps bounds can be pulled in from librpf? TODO
895                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
896                                       ox, item, deriv0[ox]);
897                         }
898
899                         if (to < numFreeParams) {
900                                 gradient[to] += -2 * deriv0[ox];
901                         } else {
902                                 if (hessian) hessian[to - numFreeParams] += -2 * deriv0[ox];
903                         }
904                 }
905         }
906
907         return -2 * ll;
908 }
909
910 double
911 ba81ComputeFit(omxExpectation* oo, int want, double *gradient, double *hessian)
912 {
913         if (!want) return 0;
914
915         omxBA81State *state = (omxBA81State*) oo->argStruct;
916         omxState* currentState = oo->currentState;
917
918         if (want & FF_COMPUTE_FIT) {
919                 ++state->fitCount;
920         }
921
922         if (want & (FF_COMPUTE_GRADIENT|FF_COMPUTE_HESSIAN)) {
923                 ++state->gradientCount;
924
925                 int numFreeParams = currentState->numFreeParams;
926                 OMXZERO(gradient, numFreeParams);
927                 if (hessian) OMXZERO(hessian, numFreeParams * numFreeParams);
928
929                 omxMatrix *itemParam = state->itemParam;
930                 OMXZERO(state->thrDeriv, state->derivPadSize * itemParam->cols * getNumThreads(oo));
931         }
932
933         double got = ba81ComputeFit1(oo, want, gradient, hessian);
934         return got;
935 }
936
937 static void
938 ba81SetupQuadrature(omxExpectation* oo, int numPoints, double *points, double *area)
939 {
940         omxBA81State *state = (omxBA81State *) oo->argStruct;
941         int numUnique = state->numUnique;
942         int numThreads = getNumThreads(oo);
943
944         state->numQpoints = numPoints;
945
946         Free(state->Qpoint);
947         Free(state->Qarea);
948         state->Qpoint = Realloc(NULL, numPoints, double);
949         state->Qarea = Realloc(NULL, numPoints, double);
950         memcpy(state->Qpoint, points, sizeof(double)*numPoints);
951         memcpy(state->Qarea, area, sizeof(double)*numPoints);
952
953         Free(state->logQarea);
954
955         state->logQarea = Realloc(NULL, state->numQpoints, double);
956         for (int px=0; px < state->numQpoints; px++) {
957                 state->logQarea[px] = log(state->Qarea[px]);
958         }
959
960         state->totalQuadPoints = 1;
961         state->totalPrimaryPoints = 1;
962         state->quadGridSize = (long*) R_alloc(state->maxDims, sizeof(long));
963         for (int dx=0; dx < state->maxDims; dx++) {
964                 state->quadGridSize[dx] = state->numQpoints;
965                 state->totalQuadPoints *= state->quadGridSize[dx];
966                 if (dx < state->maxDims-1) {
967                         state->totalPrimaryPoints *= state->quadGridSize[dx];
968                 }
969         }
970
971         Free(state->lxk);
972
973         if (!state->cacheLXK) {
974                 state->lxk = Realloc(NULL, numUnique * numThreads, double);
975         } else {
976                 int ns = state->numSpecific;
977                 if (ns == 0) ns = 1;
978                 state->lxk = Realloc(NULL, numUnique * state->totalQuadPoints * ns, double);
979         }
980 }
981
982 static void
983 ba81EAP1(omxExpectation *oo, double *workspace, long qx, int maxDims, int numUnique,
984          double *ability, double *cov, double *spstats)
985 {
986         omxBA81State *state = (omxBA81State *) oo->argStruct;
987         double *patternLik = state->patternLik;
988         int quad[maxDims];
989         decodeLocation(qx, maxDims, state->quadGridSize, quad);
990         double where[maxDims];
991         pointToWhere(state, quad, where, maxDims);
992         double logArea = logAreaProduct(state, quad, maxDims);
993         double *lxk = ba81LikelihoodFast(oo, 0, quad);
994         double *myspace = workspace + 2 * maxDims * numUnique * omx_absolute_thread_num();
995
996         for (int px=0; px < numUnique; px++) {
997                 double *piece = myspace + px * 2 * maxDims;
998                 double plik = exp(lxk[px] + logArea - patternLik[px]);
999                 for (int dx=0; dx < maxDims; dx++) {
1000                         piece[dx] = where[dx] * plik;
1001                 }
1002                 /*
1003                 for (int d1=0; d1 < maxDims; d1++) {
1004                         for (int d2=0; d2 <= d1; d2++) {
1005                                 covPiece[d1 * maxDims + d2] += piece[d1] * piece[d2];
1006                         }
1007                 }
1008                 */
1009         }
1010 #pragma omp critical(EAP1Update)
1011         for (int px=0; px < numUnique; px++) {
1012                 double *piece = myspace + px * 2 * maxDims;
1013                 double *arow = ability + px * 2 * maxDims;
1014                 for (int dx=0; dx < maxDims; dx++) {
1015                         arow[dx*2] += piece[dx];
1016                 }
1017                 /*
1018                 for (int d1=0; d1 < maxDims; d1++) {
1019                         for (int d2=0; d2 <= d1; d2++) {
1020                                 int loc = d1 * maxDims + d2;
1021                                 cov[loc] += covPiece[loc];
1022                         }
1023                 }
1024                 */
1025         }
1026 }
1027
1028 static void
1029 ba81EAP2(omxExpectation *oo, double *workspace, long qx, int maxDims, int numUnique,
1030          double *ability, double *spstats)
1031 {
1032         omxBA81State *state = (omxBA81State *) oo->argStruct;
1033         double *patternLik = state->patternLik;
1034         int quad[maxDims];
1035         decodeLocation(qx, maxDims, state->quadGridSize, quad);
1036         double where[maxDims];
1037         pointToWhere(state, quad, where, maxDims);
1038         double logArea = logAreaProduct(state, quad, maxDims);
1039         double *lxk = ba81LikelihoodFast(oo, 0, quad);
1040
1041         for (int px=0; px < numUnique; px++) {
1042                 double psd[maxDims];
1043                 double *arow = ability + px * 2 * maxDims;
1044                 for (int dx=0; dx < maxDims; dx++) {
1045                         // is this just sqrt(variance) and redundant with the covariance matrix? TODO
1046                         double ldiff = log(fabs(where[dx] - arow[dx*2]));
1047                         psd[dx] = exp(2 * ldiff + lxk[px] + logArea - patternLik[px]);
1048                 }
1049 #pragma omp critical(EAP1Update)
1050                 for (int dx=0; dx < maxDims; dx++) {
1051                         arow[dx*2+1] += psd[dx];
1052                 }
1053         }
1054 }
1055
1056 /**
1057  * MAP is not affected by the number of items. EAP is. Likelihood can
1058  * get concentrated in a single quadrature ordinate. For 3PL, response
1059  * patterns can have a bimodal likelihood. This will confuse MAP and
1060  * is a key advantage of EAP (Thissen & Orlando, 2001, p. 136).
1061  *
1062  * Thissen, D. & Orlando, M. (2001). IRT for items scored in two
1063  * categories. In D. Thissen & H. Wainer (Eds.), \emph{Test scoring}
1064  * (pp 73-140). Lawrence Erlbaum Associates, Inc.
1065  */
1066 omxRListElement *
1067 ba81EAP(omxExpectation *oo, int *numReturns)
1068 {
1069         omxBA81State *state = (omxBA81State *) oo->argStruct;
1070         state->converged = 1;
1071         int maxDims = state->maxDims;
1072         //int numSpecific = state->numSpecific;
1073
1074         *numReturns = 3; // + (maxDims > 1) + (numSpecific > 1);
1075         omxRListElement *out = (omxRListElement*) R_alloc(*numReturns, sizeof(omxRListElement));
1076         int ox=0;
1077
1078         out[ox].numValues = -1;
1079         strcpy(out[ox].label, "latent.cov");
1080         out[ox].rows = state->maxAbilities;
1081         out[ox].cols = state->maxAbilities;
1082         out[ox].values = state->latentCovOut;
1083         ++ox;
1084
1085         out[ox].numValues = state->maxAbilities;
1086         strcpy(out[ox].label, "latent.mean");
1087         out[ox].values = state->latentMeanOut;
1088         ++ox;
1089
1090         omxData *data = state->data;
1091         int numUnique = state->numUnique;
1092
1093         // TODO Wainer & Thissen. (1987). Estimating ability with the wrong
1094         // model. Journal of Educational Statistics, 12, 339-368.
1095
1096         int numQpoints = state->numQpoints * 2;  // make configurable TODO
1097
1098         if (numQpoints < 1 + 2.0 * sqrt(state->itemSpec->cols)) {
1099                 // Thissen & Orlando (2001, p. 136)
1100                 warning("EAP requires at least 2*sqrt(items) quadrature points");
1101         }
1102
1103         double Qpoint[numQpoints];
1104         double Qarea[numQpoints];
1105         const double Qwidth = 4;
1106         for (int qx=0; qx < numQpoints; qx++) {
1107                 Qpoint[qx] = Qwidth - qx * Qwidth*2 / (numQpoints-1);
1108                 Qarea[qx] = 1.0/numQpoints;
1109         }
1110         ba81SetupQuadrature(oo, numQpoints, Qpoint, Qarea);
1111         ba81Estep1(oo);   // recalc patternLik with a flat prior
1112
1113         double *cov = NULL;
1114         /*
1115         if (maxDims > 1) {
1116                 strcpy(out[2].label, "ability.cov");
1117                 out[2].numValues = -1;
1118                 out[2].rows = maxDims;
1119                 out[2].cols = maxDims;
1120                 out[2].values = (double*) R_alloc(out[2].rows * out[2].cols, sizeof(double));
1121                 cov = out[2].values;
1122                 OMXZERO(cov, out[2].rows * out[2].cols);
1123         }
1124         */
1125         double *spstats = NULL;
1126         /*
1127         if (numSpecific) {
1128                 strcpy(out[3].label, "specific");
1129                 out[3].numValues = -1;
1130                 out[3].rows = numSpecific;
1131                 out[3].cols = 2;
1132                 out[3].values = (double*) R_alloc(out[3].rows * out[3].cols, sizeof(double));
1133                 spstats = out[3].values;
1134         }
1135         */
1136
1137         // allocation of workspace could be optional
1138         int numThreads = getNumThreads(oo);
1139         double *workspace = Realloc(NULL, numUnique * maxDims * 2 * numThreads, double);
1140
1141         // Need a separate work space because the destination needs
1142         // to be in unsorted order with duplicated rows.
1143         double *ability = Calloc(numUnique * maxDims * 2, double);
1144
1145 #pragma omp parallel for num_threads(oo->currentState->numThreads)
1146         for (long qx=0; qx < state->totalQuadPoints; qx++) {
1147                 ba81EAP1(oo, workspace, qx, maxDims, numUnique, ability, cov, spstats);
1148         }
1149
1150         /*
1151         // make symmetric
1152         for (int d1=0; d1 < maxDims; d1++) {
1153                 for (int d2=0; d2 < d1; d2++) {
1154                         cov[d2 * maxDims + d1] = cov[d1 * maxDims + d2];
1155                 }
1156         }
1157         */
1158
1159 #pragma omp parallel for num_threads(oo->currentState->numThreads)
1160         for (long qx=0; qx < state->totalQuadPoints; qx++) {
1161                 ba81EAP2(oo, workspace, qx, maxDims, numUnique, ability, spstats);
1162         }
1163
1164         for (int px=0; px < numUnique; px++) {
1165                 double *arow = ability + px * 2 * maxDims;
1166                 for (int dx=0; dx < maxDims; dx++) {
1167                         arow[dx*2+1] = sqrt(arow[dx*2+1]);
1168                 }
1169         }
1170
1171         strcpy(out[ox].label, "ability");
1172         out[ox].numValues = -1;
1173         out[ox].rows = data->rows;
1174         out[ox].cols = 2 * maxDims;
1175         out[ox].values = (double*) R_alloc(out[ox].rows * out[ox].cols, sizeof(double));
1176
1177         for (int rx=0; rx < numUnique; rx++) {
1178                 double *pa = ability + rx * 2 * maxDims;
1179
1180                 int dups = omxDataNumIdenticalRows(state->data, state->rowMap[rx]);
1181                 for (int dup=0; dup < dups; dup++) {
1182                         int dest = omxDataIndex(data, state->rowMap[rx]+dup);
1183                         int col=0;
1184                         for (int dx=0; dx < maxDims; dx++) {
1185                                 out[ox].values[col * out[ox].rows + dest] = pa[col]; ++col;
1186                                 out[ox].values[col * out[ox].rows + dest] = pa[col]; ++col;
1187                         }
1188                 }
1189         }
1190         Free(ability);
1191         Free(workspace);
1192         ++ox;
1193
1194         for (int ix=0; ix < state->itemParam->cols; ix++) {
1195                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1196                 int id = spec[RPF_ISpecID];
1197                 double *param = omxMatrixColumn(state->itemParam, ix);
1198                 rpf_model[id].postfit(spec, param);
1199         }
1200
1201         return out;
1202 }
1203
1204 static void ba81Destroy(omxExpectation *oo) {
1205         if(OMX_DEBUG) {
1206                 Rprintf("Freeing %s function.\n", NAME);
1207         }
1208         omxBA81State *state = (omxBA81State *) oo->argStruct;
1209         //Rprintf("fit %d gradient %d\n", state->fitCount, state->gradientCount);
1210         omxFreeAllMatrixData(state->itemSpec);
1211         omxFreeAllMatrixData(state->itemParam);
1212         omxFreeAllMatrixData(state->EitemParam);
1213         omxFreeAllMatrixData(state->design);
1214         omxFreeAllMatrixData(state->customPrior);
1215         Free(state->logNumIdentical);
1216         Free(state->numIdentical);
1217         Free(state->Qpoint);
1218         Free(state->Qarea);
1219         Free(state->logQarea);
1220         Free(state->rowMap);
1221         Free(state->patternLik);
1222         Free(state->lxk);
1223         Free(state->Slxk);
1224         Free(state->allSlxk);
1225         Free(state->Sgroup);
1226         Free(state->paramMap);
1227         Free(state->thrDeriv);
1228         Free(state->latentMean);
1229         Free(state->latentCov);
1230         Free(state->latentMean1);
1231         Free(state->latentCov1);
1232         Free(state->latentMeanOut);
1233         Free(state->latentCovOut);
1234         Free(state);
1235 }
1236
1237 void omxInitExpectationBA81(omxExpectation* oo) {
1238         omxState* currentState = oo->currentState;      
1239         SEXP rObj = oo->rObj;
1240         SEXP tmp;
1241         
1242         if(OMX_DEBUG) {
1243                 Rprintf("Initializing %s.\n", NAME);
1244         }
1245         if (!rpf_model) {
1246                 const int wantVersion = 3;
1247                 int version;
1248                 get_librpf_t get_librpf = (get_librpf_t) R_GetCCallable("rpf", "get_librpf_model_GPL");
1249                 (*get_librpf)(&version, &rpf_numModels, &rpf_model);
1250                 if (version < wantVersion) error("librpf binary API %d installed, at least %d is required",
1251                                                  version, wantVersion);
1252         }
1253         
1254         omxBA81State *state = Calloc(1, omxBA81State);
1255         oo->argStruct = (void*) state;
1256
1257         PROTECT(tmp = GET_SLOT(rObj, install("data")));
1258         state->data = omxDataLookupFromState(tmp, currentState);
1259
1260         if (strcmp(omxDataType(state->data), "raw") != 0) {
1261                 omxRaiseErrorf(currentState, "%s unable to handle data type %s", NAME, omxDataType(state->data));
1262                 return;
1263         }
1264
1265         state->itemSpec =
1266                 omxNewMatrixFromSlot(rObj, currentState, "ItemSpec");
1267         state->design =
1268                 omxNewMatrixFromSlot(rObj, currentState, "Design");
1269         state->itemParam =
1270                 omxNewMatrixFromSlot(rObj, currentState, "ItemParam");
1271         state->EitemParam =
1272                 omxNewMatrixFromSlot(rObj, currentState, "EItemParam");
1273         if (state->EitemParam) {
1274                 state->userEitemParam = TRUE;
1275         } else {
1276                 state->EitemParam =
1277                         omxInitTemporaryMatrix(NULL, state->itemParam->rows, state->itemParam->cols,
1278                                                TRUE, currentState);
1279         }
1280
1281         state->customPrior =
1282                 omxNewMatrixFromSlot(rObj, currentState, "CustomPrior");
1283         
1284         oo->computeFun = ba81Estep;
1285         oo->destructFun = ba81Destroy;
1286         
1287         // TODO: Exactly identical rows do not contribute any information.
1288         // The sorting algorithm ought to remove them so we don't waste RAM.
1289         // The following summary stats would be cheaper to calculate too.
1290
1291         int numUnique = 0;
1292         omxData *data = state->data;
1293         if (omxDataNumFactor(data) != data->cols) {
1294                 // verify they are ordered factors TODO
1295                 omxRaiseErrorf(currentState, "%s: all columns must be factors", NAME);
1296                 return;
1297         }
1298
1299         for (int rx=0; rx < data->rows;) {
1300                 rx += omxDataNumIdenticalRows(state->data, rx);
1301                 ++numUnique;
1302         }
1303         state->numUnique = numUnique;
1304
1305         state->rowMap = Realloc(NULL, numUnique, int);
1306         state->numIdentical = Realloc(NULL, numUnique, int);
1307         state->logNumIdentical = Realloc(NULL, numUnique, double);
1308
1309         int numItems = state->itemParam->cols;
1310         if (data->cols != numItems) {
1311                 error("Data has %d columns for %d items", data->cols, numItems);
1312         }
1313
1314         for (int rx=0, ux=0; rx < data->rows; ux++) {
1315                 if (rx == 0) {
1316                         // all NA rows will sort to the top
1317                         int na=0;
1318                         for (int ix=0; ix < numItems; ix++) {
1319                                 if (omxIntDataElement(data, 0, ix) == NA_INTEGER) { ++na; }
1320                         }
1321                         if (na == numItems) {
1322                                 omxRaiseErrorf(currentState, "Remove rows with all NAs");
1323                                 return;
1324                         }
1325                 }
1326                 int dups = omxDataNumIdenticalRows(state->data, rx);
1327                 state->numIdentical[ux] = dups;
1328                 state->logNumIdentical[ux] = log(dups);
1329                 state->rowMap[ux] = rx;
1330                 rx += dups;
1331         }
1332
1333         state->patternLik = Realloc(NULL, numUnique, double);
1334
1335         int numThreads = getNumThreads(oo);
1336
1337         int maxSpec = 0;
1338         int maxParam = 0;
1339         state->maxDims = 0;
1340         state->maxOutcomes = 0;
1341
1342         for (int ix=0; ix < numItems; ix++) {
1343                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1344                 int id = spec[RPF_ISpecID];
1345                 if (id < 0 || id >= rpf_numModels) {
1346                         omxRaiseErrorf(currentState, "ItemSpec column %d has unknown item model %d", ix, id);
1347                         return;
1348                 }
1349
1350                 double *param = omxMatrixColumn(state->itemParam, ix);
1351                 rpf_model[id].prefit(spec, param);
1352         }
1353
1354         for (int cx = 0; cx < data->cols; cx++) {
1355                 const double *spec = omxMatrixColumn(state->itemSpec, cx);
1356                 int id = spec[RPF_ISpecID];
1357                 int dims = spec[RPF_ISpecDims];
1358                 if (state->maxDims < dims)
1359                         state->maxDims = dims;
1360
1361                 // TODO verify that item model can have requested number of outcomes
1362                 int no = spec[RPF_ISpecOutcomes];
1363                 if (state->maxOutcomes < no)
1364                         state->maxOutcomes = no;
1365
1366                 // TODO this summary stat should be available from omxData
1367                 int dataMax=0;
1368                 for (int rx=0; rx < data->rows; rx++) {
1369                         int pick = omxIntDataElementUnsafe(data, rx, cx);
1370                         if (dataMax < pick)
1371                                 dataMax = pick;
1372                 }
1373                 if (dataMax > no) {
1374                         error("Data for item %d has %d outcomes, not %d", cx+1, dataMax, no);
1375                 } else if (dataMax < no) {
1376                         warning("Data for item %d has only %d outcomes, not %d", cx+1, dataMax, no);
1377                 }
1378
1379                 int numSpec = (*rpf_model[id].numSpec)(spec);
1380                 if (maxSpec < numSpec)
1381                         maxSpec = numSpec;
1382
1383                 int numParam = (*rpf_model[id].numParam)(spec);
1384                 if (maxParam < numParam)
1385                         maxParam = numParam;
1386         }
1387
1388         if (state->itemSpec->cols != data->cols || state->itemSpec->rows != maxSpec) {
1389                 omxRaiseErrorf(currentState, "ItemSpec must have %d item columns and %d rows",
1390                                data->cols, maxSpec);
1391                 return;
1392         }
1393         if (state->itemParam->rows != maxParam) {
1394                 omxRaiseErrorf(currentState, "ItemParam should have %d rows", maxParam);
1395                 return;
1396         }
1397
1398         state->derivPadSize = maxParam + maxParam*(1+maxParam)/2;
1399
1400         if (state->design == NULL) {
1401                 state->maxAbilities = state->maxDims;
1402                 state->design = omxInitTemporaryMatrix(NULL, state->maxDims, numItems,
1403                                        TRUE, currentState);
1404                 for (int ix=0; ix < numItems; ix++) {
1405                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1406                         int dims = spec[RPF_ISpecDims];
1407                         for (int dx=0; dx < state->maxDims; dx++) {
1408                                 omxSetMatrixElement(state->design, dx, ix, dx < dims? (double)dx+1 : nan(""));
1409                         }
1410                 }
1411         } else {
1412                 omxMatrix *design = state->design;
1413                 if (design->cols != numItems ||
1414                     design->rows != state->maxDims) {
1415                         omxRaiseErrorf(currentState, "Design matrix should have %d rows and %d columns",
1416                                        state->maxDims, numItems);
1417                         return;
1418                 }
1419
1420                 state->maxAbilities = 0;
1421                 for (int ix=0; ix < design->rows * design->cols; ix++) {
1422                         double got = design->data[ix];
1423                         if (!R_FINITE(got)) continue;
1424                         if (round(got) != (int)got) error("Design matrix can only contain integers"); // TODO better way?
1425                         if (state->maxAbilities < got)
1426                                 state->maxAbilities = got;
1427                 }
1428                 for (int ix=0; ix < design->cols; ix++) {
1429                         const double *idesign = omxMatrixColumn(design, ix);
1430                         int ddim = 0;
1431                         for (int rx=0; rx < design->rows; rx++) {
1432                                 if (isfinite(idesign[rx])) ddim += 1;
1433                         }
1434                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1435                         int dims = spec[RPF_ISpecDims];
1436                         if (ddim > dims) error("Item %d has %d dims but design assigns %d", ix, dims, ddim);
1437                 }
1438         }
1439         if (state->maxAbilities <= state->maxDims) {
1440                 state->Sgroup = Calloc(numItems, int);
1441         } else {
1442                 // Not sure if this is correct, revisit TODO
1443                 int Sgroup0 = -1;
1444                 state->Sgroup = Realloc(NULL, numItems, int);
1445                 for (int dx=0; dx < state->maxDims; dx++) {
1446                         for (int ix=0; ix < numItems; ix++) {
1447                                 int ability = omxMatrixElement(state->design, dx, ix);
1448                                 if (dx < state->maxDims - 1) {
1449                                         if (Sgroup0 <= ability)
1450                                                 Sgroup0 = ability+1;
1451                                         continue;
1452                                 }
1453                                 int ss=-1;
1454                                 if (ability >= Sgroup0) {
1455                                         if (ss == -1) {
1456                                                 ss = ability;
1457                                         } else {
1458                                                 omxRaiseErrorf(currentState, "Item %d cannot belong to more than "
1459                                                                "1 specific dimension (both %d and %d)",
1460                                                                ix, ss, ability);
1461                                                 return;
1462                                         }
1463                                 }
1464                                 if (ss == -1) ss = Sgroup0;
1465                                 state->Sgroup[ix] = ss - Sgroup0;
1466                         }
1467                 }
1468                 state->numSpecific = state->maxAbilities - state->maxDims + 1;
1469                 state->allSlxk = Realloc(NULL, numUnique * numThreads, double);
1470                 state->Slxk = Realloc(NULL, numUnique * state->numSpecific * numThreads, double);
1471         }
1472
1473         PROTECT(tmp = GET_SLOT(rObj, install("doRescale")));
1474         state->doRescale = asLogical(tmp);
1475
1476         PROTECT(tmp = GET_SLOT(rObj, install("cache")));
1477         state->cacheLXK = asLogical(tmp);
1478
1479         PROTECT(tmp = GET_SLOT(rObj, install("GHpoints")));
1480         double *qpoints = REAL(tmp);
1481         int numQPoints = length(tmp);
1482
1483         PROTECT(tmp = GET_SLOT(rObj, install("GHarea")));
1484         double *qarea = REAL(tmp);
1485         if (numQPoints != length(tmp)) error("length(GHpoints) != length(GHarea)");
1486
1487         ba81SetupQuadrature(oo, numQPoints, qpoints, qarea);
1488
1489         state->latentMean = Realloc(NULL, state->maxAbilities * numUnique, double);
1490         state->latentCov = Realloc(NULL, state->maxAbilities * state->maxAbilities * numUnique, double);
1491         state->latentMean1 = Realloc(NULL, state->maxDims, double);
1492         state->latentCov1 = Realloc(NULL, state->maxDims * state->maxDims, double);
1493         state->latentMeanOut = Realloc(NULL, state->maxAbilities, double);
1494         state->latentCovOut = Realloc(NULL, state->maxAbilities * state->maxAbilities, double);
1495
1496         buildParamMap(oo);
1497
1498         // verify data bounded between 1 and numOutcomes TODO
1499         // hm, looks like something could be added to omxData for column summary stats?
1500 }