ifa: Snapshot
[openmx:openmx.git] / src / omxExpectationBA81.c
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 // Consider replacing log() with log2() in some places? Not worth it?
19
20 #include "omxExpectation.h"
21 #include "omxOpenmpWrap.h"
22 #include "npsolWrap.h"
23 #include "libirt-rpf.h"
24 #include "omxOptimizer.h"  // remove TODO
25
26 static const char *NAME = "ExpectationBA81";
27
28 static const struct rpf *rpf_model = NULL;
29 static int rpf_numModels;
30
31 typedef double *(*rpf_fn_t)(omxExpectation *oo, omxMatrix *itemParam, const int *quad);
32
33 typedef struct {
34
35         // data characteristics
36         omxData *data;
37         int numUnique;
38         int *numIdentical;        // length numUnique
39         double *logNumIdentical;  // length numUnique
40         int *rowMap;              // length numUnique
41
42         // item description related
43         omxMatrix *itemSpec;
44         int maxOutcomes;
45         int maxDims;
46         int maxAbilities;
47         int numSpecific;
48         int *Sgroup;              // item's specific group 0..numSpecific-1
49         omxMatrix *design;        // items * maxDims
50
51         // quadrature related
52         int numQpoints;
53         double *Qpoint;
54         double *Qarea;
55         double *logQarea;
56         long *quadGridSize;       // maxDims
57         long totalPrimaryPoints;  // product of quadGridSize except specific dim
58         long totalQuadPoints;     // product of quadGridSize
59
60         // estimation related
61         omxMatrix *EitemParam;    // E step version
62         omxMatrix *itemParam;     // M step version
63         SEXP rpf;
64         rpf_fn_t computeRPF;
65         omxMatrix *customPrior;
66         int *paramMap;
67         double *thrGradient1;     // thread * length(itemParam)
68         double *thrGradient;      // thread * length(itemParam)
69         int cacheLXK;             // w/cache,  numUnique * #specific quad points * totalQuadPoints
70         double *lxk;              // wo/cache, numUnique * thread
71         double *allSlxk;          // numUnique * thread
72         double *Slxk;             // numUnique * #specific dimensions * thread
73         double *patternLik;       // numUnique
74         double ll;                // the most recent finite ll; TODO obsolete by lbound/ubound
75         // for multi-group, need to know the reference group TODO
76         double *latentMean;       // maxAbilities * numUnique
77         double *latentCov;        // maxAbilities * maxAbilities * numUnique ; only lower triangle is used
78         double *latentMean1;      // maxDims
79         double *latentCov1;       // maxDims * maxDims ; only lower triangle is used
80         double *latentMeanOut;    // maxAbilities
81         double *latentCovOut;     // maxAbilities * maxAbilities ; only lower triangle is used
82         int doRescale;
83         int choleskyError;
84
85         int converged;
86         int gradientCount;
87         int fitCount;
88 } omxBA81State;
89
90 static void
91 pda(const double *ar, int rows, int cols) {   // column major order
92         for (int rx=0; rx < rows; rx++) {
93                 for (int cx=0; cx < cols; cx++) {
94                         Rprintf("%.6g ", ar[cx * rows + rx]);
95                 }
96                 Rprintf("\n");
97         }
98 }
99
100 static void
101 pia(const int *ar, int rows, int cols) {   // column major order
102         for (int rx=0; rx < rows; rx++) {
103                 for (int cx=0; cx < cols; cx++) {
104                         Rprintf("%d ", ar[cx * rows + rx]);
105                 }
106                 Rprintf("\n");
107         }
108 }
109
110 static int
111 getNumThreads(omxExpectation* oo)
112 {
113         int numThreads = oo->currentState->numThreads;
114         if (numThreads < 1) numThreads = 1;
115         return numThreads;
116 }
117
118 static void buildParamMap(omxExpectation* oo)
119 {
120         omxState* currentState = oo->currentState;
121         omxBA81State *state = (omxBA81State *) oo->argStruct;
122         omxMatrix *itemParam = state->itemParam;
123         int size = itemParam->rows * itemParam->cols;
124
125         state->paramMap = Realloc(NULL, size, int);
126         for (int px=0; px < size; px++) {
127                 state->paramMap[px] = -1;
128         }
129
130         int numFreeParams = currentState->numFreeParams;
131         for (int px=0; px < numFreeParams; px++) {
132                 omxFreeVar *fv = currentState->freeVarList + px;
133                 for (int lx=0; lx < fv->numLocations; lx++) {
134                         if (~fv->matrices[lx] == itemParam->matrixNumber) {
135                                 state->paramMap[fv->col[lx] * itemParam->rows + fv->row[lx]] = px;
136                         }
137                 }
138         }
139
140         //pia(state->paramMap, itemParam->rows, itemParam->cols);
141
142         state->thrGradient = Realloc(NULL, size * getNumThreads(oo), double);
143         state->thrGradient1 = Realloc(NULL, size * getNumThreads(oo), double);
144 }
145
146 OMXINLINE static void
147 pointToWhere(omxBA81State *state, const int *quad, double *where, int upto)
148 {
149         for (int dx=0; dx < upto; dx++) {
150                 where[dx] = state->Qpoint[quad[dx]];
151         }
152 }
153
154 OMXINLINE static void
155 assignDims(omxMatrix *itemSpec, omxMatrix *design, int dims, int maxDims, int ix,
156            const double *restrict theta, double *restrict ptheta)
157 {
158         for (int dx=0; dx < dims; dx++) {
159                 int ability = (int)omxMatrixElement(design, dx, ix) - 1;
160                 if (ability >= maxDims) ability = maxDims-1;
161                 ptheta[dx] = theta[ability];
162         }
163 }
164
165 /**
166  * This is the main function needed to generate simulated data from
167  * the model. It could be argued that the rest of the estimation
168  * machinery belongs in the fitfunction.
169  *
170  * \param theta Vector of ability parameters, one per ability
171  * \returns A numItems by maxOutcomes colMajor vector of doubles. Caller must Free it.
172  */
173 static double *
174 standardComputeRPF(omxExpectation *oo, omxMatrix *itemParam, const int *quad)
175 {
176         omxBA81State *state = (omxBA81State*) oo->argStruct;
177         omxMatrix *itemSpec = state->itemSpec;
178         int numItems = itemSpec->cols;
179         omxMatrix *design = state->design;
180         int maxDims = state->maxDims;
181
182         double theta[maxDims];
183         pointToWhere(state, quad, theta, maxDims);
184
185         double *outcomeProb = Realloc(NULL, numItems * state->maxOutcomes, double);
186         //double *outcomeProb = Calloc(numItems * state->maxOutcomes, double);
187
188         for (int ix=0; ix < numItems; ix++) {
189                 const double *spec = omxMatrixColumn(itemSpec, ix);
190                 double *iparam = omxMatrixColumn(itemParam, ix);
191                 double *out = outcomeProb + ix * state->maxOutcomes;
192                 int id = spec[RPF_ISpecID];
193                 int dims = spec[RPF_ISpecDims];
194                 double ptheta[dims];
195                 assignDims(itemSpec, design, dims, maxDims, ix, theta, ptheta);
196                 (*rpf_model[id].logprob)(spec, iparam, ptheta, out);
197 #if 0
198                 for (int ox=0; ox < spec[RPF_ISpecOutcomes]; ox++) {
199                         if (!isfinite(out[ox]) || out[ox] > 0) {
200                                 Rprintf("spec\n");
201                                 pda(spec, itemSpec->rows, 1);
202                                 Rprintf("item param\n");
203                                 pda(iparam, itemParam->rows, 1);
204                                 Rprintf("where\n");
205                                 pda(ptheta, dims, 1);
206                                 error("RPF returned %20.20f", out[ox]);
207                         }
208                 }
209 #endif
210         }
211
212         return outcomeProb;
213 }
214
215 static double *
216 RComputeRPF1(omxExpectation *oo, omxMatrix *itemParam, const int *quad)
217 {
218         omxBA81State *state = (omxBA81State*) oo->argStruct;
219         int maxOutcomes = state->maxOutcomes;
220         omxMatrix *design = state->design;
221         omxMatrix *itemSpec = state->itemSpec;
222         int maxDims = state->maxDims;
223
224         double theta[maxDims];
225         pointToWhere(state, quad, theta, maxDims);
226
227         SEXP invoke;
228         PROTECT(invoke = allocVector(LANGSXP, 4));
229         SETCAR(invoke, state->rpf);
230         SETCADR(invoke, omxExportMatrix(itemParam));
231         SETCADDR(invoke, omxExportMatrix(itemSpec));
232
233         SEXP where;
234         PROTECT(where = allocMatrix(REALSXP, maxDims, itemParam->cols));
235         double *ptheta = REAL(where);
236         for (int ix=0; ix < itemParam->cols; ix++) {
237                 int dims = omxMatrixElement(itemSpec, RPF_ISpecDims, ix);
238                 assignDims(itemSpec, design, dims, maxDims, ix, theta, ptheta + ix*maxDims);
239                 for (int dx=dims; dx < maxDims; dx++) {
240                         ptheta[ix*maxDims + dx] = NA_REAL;
241                 }
242         }
243         SETCADDDR(invoke, where);
244
245         SEXP matrix;
246         PROTECT(matrix = eval(invoke, R_GlobalEnv));
247
248         if (!isMatrix(matrix)) {
249                 omxRaiseError(oo->currentState, -1,
250                               "RPF must return an item by outcome matrix");
251                 return NULL;
252         }
253
254         SEXP matrixDims;
255         PROTECT(matrixDims = getAttrib(matrix, R_DimSymbol));
256         int *dimList = INTEGER(matrixDims);
257         int numItems = state->itemSpec->cols;
258         if (dimList[0] != maxOutcomes || dimList[1] != numItems) {
259                 const int errlen = 200;
260                 char errstr[errlen];
261                 snprintf(errstr, errlen, "RPF must return a %d outcomes by %d items matrix",
262                          maxOutcomes, numItems);
263                 omxRaiseError(oo->currentState, -1, errstr);
264                 return NULL;
265         }
266
267         // Unlikely to be of type INTSXP, but just to be safe
268         PROTECT(matrix = coerceVector(matrix, REALSXP));
269         double *restrict got = REAL(matrix);
270
271         // Need to copy because threads cannot share SEXP
272         double *restrict outcomeProb = Realloc(NULL, numItems * maxOutcomes, double);
273
274         // Double check there aren't NAs in the wrong place
275         for (int ix=0; ix < numItems; ix++) {
276                 int numOutcomes = omxMatrixElement(state->itemSpec, RPF_ISpecOutcomes, ix);
277                 for (int ox=0; ox < numOutcomes; ox++) {
278                         int vx = ix * maxOutcomes + ox;
279                         if (isnan(got[vx])) {
280                                 const int errlen = 200;
281                                 char errstr[errlen];
282                                 snprintf(errstr, errlen, "RPF returned NA in [%d,%d]", ox,ix);
283                                 omxRaiseError(oo->currentState, -1, errstr);
284                         }
285                         outcomeProb[vx] = got[vx];
286                 }
287         }
288
289         return outcomeProb;
290 }
291
292 static double *
293 RComputeRPF(omxExpectation *oo, omxMatrix *itemParam, const int *quad)
294 {
295         omx_omp_set_lock(&GlobalRLock);
296         PROTECT_INDEX pi = omxProtectSave();
297         double *ret = RComputeRPF1(oo, itemParam, quad);
298         omxProtectRestore(pi);
299         omx_omp_unset_lock(&GlobalRLock);  // hope there was no exception!
300         return ret;
301 }
302
303 OMXINLINE static long
304 encodeLocation(const int dims, const long *restrict grid, const int *restrict quad)
305 {
306         long qx = 0;
307         for (int dx=dims-1; dx >= 0; dx--) {
308                 qx = qx * grid[dx];
309                 qx += quad[dx];
310         }
311         return qx;
312 }
313
314 #define CALC_LXK_CACHED(state, numUnique, quad, tqp, specific) \
315         ((state)->lxk + \
316          (numUnique) * encodeLocation((state)->maxDims, (state)->quadGridSize, quad) + \
317          (numUnique) * (tqp) * (specific))
318
319 OMXINLINE static double *
320 ba81Likelihood(omxExpectation *oo, int specific, const int *restrict quad)
321 {
322         omxBA81State *state = (omxBA81State*) oo->argStruct;
323         int numUnique = state->numUnique;
324         int maxOutcomes = state->maxOutcomes;
325         omxData *data = state->data;
326         int numItems = state->itemSpec->cols;
327         rpf_fn_t rpf_fn = state->computeRPF;
328         int *restrict Sgroup = state->Sgroup;
329         double *restrict lxk;
330
331         if (!state->cacheLXK) {
332                 lxk = state->lxk + numUnique * omx_absolute_thread_num();
333         } else {
334                 lxk = CALC_LXK_CACHED(state, numUnique, quad, state->totalQuadPoints, specific);
335         }
336
337         const double *outcomeProb = (*rpf_fn)(oo, state->EitemParam, quad);
338         if (!outcomeProb) {
339                 OMXZERO(lxk, numUnique);
340                 return lxk;
341         }
342
343         const int *rowMap = state->rowMap;
344         for (int px=0; px < numUnique; px++) {
345                 double lxk1 = 0;
346                 for (int ix=0; ix < numItems; ix++) {
347                         if (specific != Sgroup[ix]) continue;
348                         int pick = omxIntDataElementUnsafe(data, rowMap[px], ix);
349                         if (pick == NA_INTEGER) continue;
350                         double piece = outcomeProb[ix * maxOutcomes + pick-1];
351                         lxk1 += piece;
352                 }
353 #if 0
354 #pragma omp critical(ba81LikelihoodDebug1)
355                 if (!isfinite(lxk1) || lxk1 > numItems) {
356                         Rprintf("where\n");
357                         double where[state->maxDims];
358                         pointToWhere(state, quad, where, state->maxDims);
359                         pda(where, state->maxDims, 1);
360                         Rprintf("prob\n");
361                         pda(outcomeProb, numItems, maxOutcomes);
362                         error("Likelihood of row %d is %f", rowMap[px], lxk1);
363                 }
364 #endif
365                 lxk[px] = lxk1;
366         }
367
368         Free(outcomeProb);
369
370         return lxk;
371 }
372
373 OMXINLINE static double *
374 ba81LikelihoodFast(omxExpectation *oo, int specific, const int *restrict quad)
375 {
376         omxBA81State *state = (omxBA81State*) oo->argStruct;
377         if (!state->cacheLXK) {
378                 return ba81LikelihoodFast(oo, specific, quad);
379         } else {
380                 return CALC_LXK_CACHED(state, state->numUnique, quad, state->totalQuadPoints, specific);
381         }
382
383 }
384
385 OMXINLINE static void
386 mapLatentSpace(omxBA81State *state, int px, int sgroup, double piece, const double *where)
387 {
388         double *latentMean = state->latentMean;
389         double *latentCov = state->latentCov;
390         int maxDims = state->maxDims;
391         int maxAbilities = state->maxAbilities;
392         int pmax = maxDims;
393         if (state->numSpecific) pmax -= 1;
394
395         if (sgroup == 0) {
396                 for (int d1=0; d1 < pmax; d1++) {
397                         double piece_w1 = piece * where[d1];
398                         int mloc = px * maxAbilities + d1;
399 #pragma omp atomic
400                         latentMean[mloc] += piece_w1;
401                         for (int d2=0; d2 <= d1; d2++) {
402                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
403                                 double piece_cov = piece_w1 * where[d2];
404 #pragma omp atomic
405                                 latentCov[loc] += piece_cov;
406                         }
407                 }
408         }
409
410         if (state->numSpecific) {
411                 int sdim = maxDims + sgroup - 1;
412
413                 double piece_w1 = piece * where[maxDims-1];
414                 int mloc = px * maxAbilities + sdim;
415 #pragma omp atomic
416                 latentMean[mloc] += piece_w1;
417
418                 int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
419                 double piece_var = piece_w1 * where[maxDims-1];
420 #pragma omp atomic
421                 latentCov[loc] += piece_var;
422         }
423 }
424
425 OMXINLINE static double
426 logAreaProduct(omxBA81State *state, const int *restrict quad, const int upto)
427 {
428         double logArea = 0;
429         for (int dx=0; dx < upto; dx++) {
430                 logArea += state->logQarea[quad[dx]];
431         }
432         return logArea;
433 }
434
435 #define CALC_ALLSLXK(state, numUnique) \
436         (state->allSlxk + omx_absolute_thread_num() * (numUnique))
437
438 #define CALC_SLXK(state, numUnique, numSpecific) \
439         (state->Slxk + omx_absolute_thread_num() * (numUnique) * (numSpecific))
440
441 OMXINLINE static void
442 cai2010(omxExpectation* oo, int recompute, const int *restrict primaryQuad,
443         double *restrict allSlxk, double *restrict Slxk)
444 {
445         omxBA81State *state = (omxBA81State*) oo->argStruct;
446         int numUnique = state->numUnique;
447         int numSpecific = state->numSpecific;
448         int maxDims = state->maxDims;
449         int sDim = maxDims-1;
450
451         int quad[maxDims];
452         memcpy(quad, primaryQuad, sizeof(int)*sDim);
453
454         OMXZERO(Slxk, numUnique * numSpecific);
455         OMXZERO(allSlxk, numUnique);
456
457         for (int sx=0; sx < numSpecific; sx++) {
458                 double *eis = Slxk + numUnique * sx;
459                 int quadGridSize = state->quadGridSize[sDim];
460
461                 for (int qx=0; qx < quadGridSize; qx++) {
462                         quad[sDim] = qx;
463                         double where[maxDims];
464                         pointToWhere(state, quad, where, maxDims);
465
466                         double *lxk;
467                         if (recompute) {
468                                 lxk = ba81Likelihood(oo, sx, quad);
469                         } else {
470                                 lxk = CALC_LXK_CACHED(state, numUnique, quad, state->totalQuadPoints, sx);
471                         }
472
473                         for (int ix=0; ix < numUnique; ix++) {
474                                 eis[ix] += exp(lxk[ix] + state->logQarea[qx]);
475                         }
476                 }
477
478                 for (int px=0; px < numUnique; px++) {
479                         eis[px] = log(eis[px]);
480                         allSlxk[px] += eis[px];
481                 }
482         }
483 }
484
485 // The idea of this API is to allow passing in a number larger than 1.
486 OMXINLINE static void
487 areaProduct(omxBA81State *state, const int *restrict quad, const int upto, double *restrict out)
488 {
489         for (int dx=0; dx < upto; dx++) {
490                 *out *= state->Qarea[quad[dx]];
491         }
492 }
493
494 OMXINLINE static void
495 decodeLocation(long qx, const int dims, const long *restrict grid,
496                int *restrict quad)
497 {
498         for (int dx=0; dx < dims; dx++) {
499                 quad[dx] = qx % grid[dx];
500                 qx = qx / grid[dx];
501         }
502 }
503
504 static void
505 ba81Estep1(omxExpectation *oo) {
506         if(OMX_DEBUG_MML) {Rprintf("Beginning %s Computation.\n", NAME);}
507
508         omxBA81State *state = (omxBA81State*) oo->argStruct;
509         double *patternLik = state->patternLik;
510         int numUnique = state->numUnique;
511         int numSpecific = state->numSpecific;
512         double *latentMean = state->latentMean;
513         double *latentCov = state->latentCov;
514         int maxDims = state->maxDims;
515         int maxAbilities = state->maxAbilities;
516         int primaryDims = maxDims;
517
518         OMXZERO(patternLik, numUnique);
519         OMXZERO(latentMean, numUnique * maxAbilities);
520         OMXZERO(latentCov, numUnique * maxAbilities * maxAbilities);
521
522         // E-step, marginalize person ability
523         //
524         // Note: In the notation of Bock & Aitkin (1981) and
525         // Cai~(2010), these loops are reversed.  That is, the inner
526         // loop is over quadrature points and the outer loop is over
527         // all response patterns.
528         //
529         if (numSpecific == 0) {
530 #pragma omp parallel for num_threads(oo->currentState->numThreads)
531                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
532                         int quad[maxDims];
533                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
534                         double where[maxDims];
535                         pointToWhere(state, quad, where, maxDims);
536
537                         double *lxk = ba81Likelihood(oo, 0, quad);
538
539                         double logArea = logAreaProduct(state, quad, maxDims);
540 #pragma omp critical(EstepUpdate)
541                         for (int px=0; px < numUnique; px++) {
542                                 double tmp = exp(lxk[px] + logArea);
543 #if 0
544                                 if (!isfinite(tmp)) {
545                                         Rprintf("where\n");
546                                         pda(where, maxDims, 1);
547                                         error("Row %d lxk %f logArea %f tmp %f",
548                                               state->rowMap[px], lxk[px], logArea, tmp);
549                                 }
550 #endif
551                                 patternLik[px] += tmp;
552                                 mapLatentSpace(state, px, 0, tmp, where);
553                         }
554                 }
555         } else {
556                 primaryDims -= 1;
557                 int sDim = primaryDims;
558                 long specificPoints = state->quadGridSize[sDim];
559
560 #pragma omp parallel for num_threads(oo->currentState->numThreads)
561                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
562                         int quad[maxDims];
563                         decodeLocation(qx, primaryDims, state->quadGridSize, quad);
564
565                         double *allSlxk = CALC_ALLSLXK(state, numUnique);
566                         double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
567                         cai2010(oo, TRUE, quad, allSlxk, Slxk);
568
569                         for (int sgroup=0; sgroup < numSpecific; sgroup++) {
570                                 double *eis = Slxk + numUnique * sgroup;
571                                 for (long sx=0; sx < specificPoints; sx++) {
572                                         quad[sDim] = sx;
573                                         double where[maxDims];
574                                         pointToWhere(state, quad, where, maxDims);
575                                         double logArea = logAreaProduct(state, quad, maxDims);
576                                         double *lxk = ba81LikelihoodFast(oo, sgroup, quad);
577                                         for (int px=0; px < numUnique; px++) {
578                                                 double tmp = exp((allSlxk[px] - eis[px]) + lxk[px] + logArea);
579                                                 mapLatentSpace(state, px, sgroup, tmp, where);
580                                         }
581                                 }
582                         }
583
584                         double priLogArea = logAreaProduct(state, quad, primaryDims);
585 #pragma omp critical(EstepUpdate)
586                         for (int px=0; px < numUnique; px++) {
587                                 double tmp = exp(allSlxk[px] + priLogArea);
588                                 patternLik[px] += tmp;  // is it faster to make this line atomic? TODO
589                         }
590                 }
591         }
592
593         int *numIdentical = state->numIdentical;
594
595         if(0) {
596                 Rprintf("weight\n");
597                 for (int px=0; px < numUnique; px++) {
598                         double weight = numIdentical[px] / patternLik[px];
599                         Rprintf("%20.20f\n", weight);
600                 }
601
602                 Rprintf("per item mean\n");
603                 for (int px=0; px < numUnique; px++) {
604                         Rprintf("[%d] %20.20f\n", px, latentMean[px * maxAbilities]);
605                 }
606         }
607
608         for (int px=0; px < numUnique; px++) {
609                 double weight = numIdentical[px] / patternLik[px];
610                 for (int d1=0; d1 < primaryDims; d1++) {
611                         latentMean[px * maxAbilities + d1] *= weight;
612                         for (int d2=0; d2 <= d1; d2++) {
613                                 int loc = px * maxAbilities * maxAbilities + d2 * maxAbilities + d1;
614                                 latentCov[loc] *= weight;
615                         }
616                 }
617                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
618                         latentMean[px * maxAbilities + sdim] *= weight;
619                         int loc = px * maxAbilities * maxAbilities + sdim * maxAbilities + sdim;
620                         latentCov[loc] *= weight;
621                 }
622 #if 0
623                 if (!isfinite(patternLik[px])) {
624                         error("Likelihood of row %d is %f", state->rowMap[px], patternLik[px]);
625                 }
626 #endif
627                 patternLik[px] = log(patternLik[px]);
628         }
629
630         for (int px=1; px < numUnique; px++) {
631                 for (int d1=0; d1 < primaryDims; d1++) {
632                         latentMean[d1] += latentMean[px * maxAbilities + d1];
633                         for (int d2=0; d2 <= d1; d2++) {
634                                 int cell = d2 * maxAbilities + d1;
635                                 int loc = px * maxAbilities * maxAbilities + cell;
636                                 latentCov[cell] += latentCov[loc];
637                         }
638                 }
639                 for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
640                         latentMean[sdim] += latentMean[px * maxAbilities + sdim];
641                         int cell = sdim * maxAbilities + sdim;
642                         int loc = px * maxAbilities * maxAbilities + cell;
643                         latentCov[cell] += latentCov[loc];
644                 }
645         }
646
647         //pda(latentMean, state->maxAbilities, 1);
648         //pda(latentCov, state->maxAbilities, state->maxAbilities);
649
650         // only consider reference group TODO
651         omxData *data = state->data;
652         for (int d1=0; d1 < maxAbilities; d1++) {
653                 latentMean[d1] /= data->rows;
654         }
655
656         for (int d1=0; d1 < primaryDims; d1++) {
657                 for (int d2=0; d2 <= d1; d2++) {
658                         int cell = d2 * maxAbilities + d1;
659                         int tcell = d1 * maxAbilities + d2;
660                         latentCov[tcell] = latentCov[cell] =
661                                 latentCov[cell] / data->rows - latentMean[d1] * latentMean[d2];
662                 }
663         }
664         for (int sdim=primaryDims; sdim < maxAbilities; sdim++) {
665                 int cell = sdim * maxAbilities + sdim;
666                 latentCov[cell] = latentCov[cell] / data->rows - latentMean[sdim] * latentMean[sdim];
667         }
668
669         if (state->converged) return;
670
671         //pda(latentMean, state->maxAbilities, 1);
672         //pda(latentCov, state->maxAbilities, state->maxAbilities);
673
674         memcpy(state->latentMeanOut, state->latentMean, maxAbilities * sizeof(double));
675         memcpy(state->latentCovOut, state->latentCov, maxAbilities * maxAbilities * sizeof(double));
676
677         const char triangle = 'L';
678         F77_CALL(dpotrf)(&triangle, &maxAbilities, latentCov, &maxAbilities, &state->choleskyError);
679         if (state->choleskyError != 0) {
680                 warning("Cholesky failed with %d; rescaling disabled", state->choleskyError); // make error TODO?
681         }
682 }
683
684 static void
685 schilling_bock_2005_rescale(omxExpectation *oo)
686 {
687         omxBA81State *state = (omxBA81State*) oo->argStruct;
688         omxMatrix *itemSpec = state->itemSpec;
689         omxMatrix *itemParam = state->itemParam;
690         omxMatrix *design = state->design;
691         double *latentMean = state->latentMean;
692         double *latentCov = state->latentCov;
693         double *latentMean1 = state->latentMean1;
694         double *latentCov1 = state->latentCov1;
695         int maxAbilities = state->maxAbilities;
696         int maxDims = state->maxDims;
697
698         //Rprintf("schilling bock\n");
699         //pda(latentMean, maxAbilities, 1);
700         //pda(latentCov, maxAbilities, maxAbilities);
701         //omxPrint(design, "design");
702
703         if (state->choleskyError != 0) return;
704
705         int numItems = itemParam->cols;
706         for (int ix=0; ix < numItems; ix++) {
707                 const double *spec = omxMatrixColumn(itemSpec, ix);
708                 int id = spec[RPF_ISpecID];
709                 const double *rawDesign = omxMatrixColumn(design, ix);
710                 int idesign[design->rows];
711                 int idx = 0;
712                 for (int dx=0; dx < design->rows; dx++) {
713                         if (isfinite(rawDesign[dx])) {
714                                 idesign[idx++] = rawDesign[dx]-1;
715                         } else {
716                                 idesign[idx++] = -1;
717                         }
718                 }
719                 for (int d1=0; d1 < idx; d1++) {
720                         if (idesign[d1] == -1) {
721                                 latentMean1[d1] = 0;
722                         } else {
723                                 latentMean1[d1] = latentMean[idesign[d1]];
724                         }
725                         for (int d2=0; d2 <= d1; d2++) {
726                                 if (idesign[d1] == -1 || idesign[d2] == -1) {
727                                         latentCov1[d2 * maxDims + d1] = 0;
728                                 } else {
729                                         latentCov1[d2 * maxDims + d1] =
730                                                 latentCov[idesign[d2] * maxAbilities + idesign[d1]];
731                                 }
732                         }
733                 }
734                 if (1) {  // make optional TODO
735                         for (int d1=idx; d1 < maxDims; d1++) latentMean1[d1] = nan("");
736                         for (int d1=0; d1 < maxDims; d1++) {
737                                 for (int d2=0; d2 < maxDims; d2++) {
738                                         if (d1 < idx && d2 < idx) continue;
739                                         latentCov1[d2 * maxDims + d1] = nan("");
740                                 }
741                         }
742                 }
743                 //pda(latentMean1, maxDims, 1);
744                 //pda(latentCov1, maxDims, maxDims);
745                 double *iparam = omxMatrixColumn(itemParam, ix);
746                 int *mask = state->paramMap + itemParam->rows * ix;
747                 rpf_model[id].rescale(spec, iparam, mask, latentMean1, latentCov1);
748         }
749
750         // this is an egregious hack :-)
751         omxState* currentState = oo->currentState;
752         int numFreeParams = currentState->numFreeParams;
753         double param[numFreeParams];
754         for (int rx=0; rx < itemParam->rows; rx++) {
755                 for (int cx=0; cx < itemParam->cols; cx++) {
756                         int vx = state->paramMap[cx * itemParam->rows + rx];
757                         if (vx == -1) continue;
758                         param[vx] = omxMatrixElement(itemParam, rx, cx);
759                 }
760         }
761         handleFreeVarList(currentState, param, numFreeParams);
762 }
763
764 static void
765 ba81Estep(omxExpectation *oo) {
766         omxBA81State *state = (omxBA81State*) oo->argStruct;
767         omxCopyMatrix(state->EitemParam, state->itemParam);
768         ba81Estep1(oo);
769         if (state->doRescale) schilling_bock_2005_rescale(oo);
770 }
771
772 OMXINLINE static void
773 expectedUpdate(omxData *restrict data, const int *rowMap, const int px, const int item,
774                const double observed, const int outcomes, double *out)
775 {
776         int pick = omxIntDataElementUnsafe(data, rowMap[px], item);
777         if (pick == NA_INTEGER) {
778                 double slice = exp(observed - log(outcomes));
779                 for (int ox=0; ox < outcomes; ox++) {
780                         out[ox] += slice;
781                 }
782         } else {
783                 out[pick-1] += exp(observed);
784         }
785 }
786
787 /** 
788  * \param quad a vector that indexes into a multidimensional quadrature
789  * \param out points to an array numOutcomes wide
790  */
791 OMXINLINE static void
792 ba81Weight(omxExpectation* oo, const int item, const int *quad, int outcomes, double *out)
793 {
794         omxBA81State *state = (omxBA81State*) oo->argStruct;
795         omxData *data = state->data;
796         const int *rowMap = state->rowMap;
797         int specific = state->Sgroup[item];
798         double *patternLik = state->patternLik;
799         double *logNumIdentical = state->logNumIdentical;
800         int numUnique = state->numUnique;
801         int numSpecific = state->numSpecific;
802         int sDim = state->maxDims-1;
803
804         OMXZERO(out, outcomes);
805
806         if (numSpecific == 0) {
807                 double *lxk = ba81LikelihoodFast(oo, specific, quad);
808                 for (int px=0; px < numUnique; px++) {
809                         double observed = logNumIdentical[px] + lxk[px] - patternLik[px];
810                         expectedUpdate(data, rowMap, px, item, observed, outcomes, out);
811                 }
812         } else {
813                 double *allSlxk = CALC_ALLSLXK(state, numUnique);
814                 double *Slxk = CALC_SLXK(state, numUnique, numSpecific);
815                 if (quad[sDim] == 0) {
816                         // allSlxk, Slxk only depend on the ordinate of the primary dimensions
817                         cai2010(oo, !state->cacheLXK, quad, allSlxk, Slxk);
818                 }
819                 double *eis = Slxk + numUnique * specific;
820
821                 // Avoid recalc with modest buffer? TODO
822                 double *lxk = ba81LikelihoodFast(oo, specific, quad);
823
824                 for (int px=0; px < numUnique; px++) {
825                         double observed = logNumIdentical[px] + (allSlxk[px] - eis[px]) +
826                                 (lxk[px] - patternLik[px]);
827                         expectedUpdate(data, rowMap, px, item, observed, outcomes, out);
828                 }
829         }
830 }
831
832 OMXINLINE static double
833 ba81Fit1Ordinate(omxExpectation* oo, const int *quad, int want)
834 {
835         omxBA81State *state = (omxBA81State*) oo->argStruct;
836         omxMatrix *itemSpec = state->itemSpec;
837         omxMatrix *itemParam = state->itemParam;
838         int numItems = itemParam->cols;
839         rpf_fn_t rpf_fn = state->computeRPF;
840         int maxOutcomes = state->maxOutcomes;
841         int maxDims = state->maxDims;
842         double *gradient = state->thrGradient1 + itemParam->rows * itemParam->cols * omx_absolute_thread_num();
843         int do_gradient = want & FF_COMPUTE_GRADIENT;
844
845         double where[maxDims];
846         pointToWhere(state, quad, where, maxDims);
847
848         double *outcomeProb = (*rpf_fn)(oo, itemParam, quad); // avoid malloc/free? TODO
849         if (!outcomeProb) return 0;
850
851         double thr_ll = 0;
852         for (int ix=0; ix < numItems; ix++) {
853                 const double *spec = omxMatrixColumn(itemSpec, ix);
854                 int id = spec[RPF_ISpecID];
855                 int outcomes = spec[RPF_ISpecOutcomes];
856
857                 double weight[outcomes];
858                 ba81Weight(oo, ix, quad, outcomes, weight);
859                 for (int ox=0; ox < outcomes; ox++) {
860 #if 0
861 #pragma omp critical(ba81Fit1OrdinateDebug1)
862                         if (!isfinite(outcomeProb[ix * maxOutcomes + ox])) {
863                                 pda(itemParam->data, itemParam->rows, itemParam->cols);
864                                 pda(outcomeProb, outcomes, numItems);
865                                 error("RPF produced NAs");
866                         }
867 #endif
868                         double got = weight[ox] * outcomeProb[ix * maxOutcomes + ox];
869                         areaProduct(state, quad, maxDims, &got);
870                         thr_ll += got;
871                 }
872
873                 if (do_gradient) {
874                         int *map = state->paramMap + itemParam->rows * ix;
875                         int mask[itemParam->rows];
876                         for (int mx=0; mx < itemParam->rows; mx++) {
877                                 mask[mx] = (map[mx] >= 0)? mx : -1;
878                         }
879                         double *iparam = omxMatrixColumn(itemParam, ix);
880                         (*rpf_model[id].gradient)(spec, iparam, mask, where, weight,
881                                                   gradient + itemParam->rows * ix);
882                 }
883         }
884
885         Free(outcomeProb);
886
887         if (do_gradient) {
888                 double *thrG = state->thrGradient + itemParam->rows * itemParam->cols * omx_absolute_thread_num();
889
890                 for (int ox=0; ox < itemParam->rows * itemParam->cols; ox++) {
891                         if (state->paramMap[ox] == -1) continue;
892 #if 0
893 #pragma omp critical(ba81Fit1OrdinateDebug2)
894                         if (!isfinite(gradient[ox])) {
895                                 int item = ox / itemParam->rows;
896                                 const double *spec = omxMatrixColumn(itemSpec, item);
897                                 int id = spec[RPF_ISpecID];
898                                 Rprintf("item spec:\n");
899                                 pda(spec, (*rpf_model[id].numSpec)(spec), 1);
900                                 Rprintf("item parameters:\n");
901                                 const double *iparam = omxMatrixColumn(itemParam, item);
902                                 pda(iparam, itemParam->rows, 1);
903                                 Rprintf("where:\n");
904                                 pda(where, maxDims, 1);
905                                 int outcomes = spec[RPF_ISpecOutcomes];
906                                 double weight[outcomes];
907                                 ba81Weight(oo, item, quad, outcomes, weight);
908                                 Rprintf("weight:\n");
909                                 pda(weight, outcomes, 1);
910                                 error("Gradient for item %d param %d is %f; are you missing a lbound/ubound?",
911                                       item, ox, gradient[ox]);
912                         }
913 #endif
914                         areaProduct(state, quad, maxDims, gradient+ox);
915                         thrG[ox] += gradient[ox];
916                 }
917         }
918
919         return thr_ll;
920 }
921
922 static double
923 ba81ComputeFit1(omxExpectation* oo, int want, double *gradient)
924 {
925         omxBA81State *state = (omxBA81State*) oo->argStruct;
926         omxState* currentState = oo->currentState;  // only used in #pragma omp
927         omxMatrix *customPrior = state->customPrior;
928         omxMatrix *itemParam = state->itemParam;
929         omxMatrix *itemSpec = state->itemSpec;
930         int numSpecific = state->numSpecific;
931         int maxDims = state->maxDims;
932
933         double ll = 0;
934         if (customPrior) {
935                 omxRecompute(customPrior);
936                 ll = customPrior->data[0];
937         } else {
938                 int numItems = itemSpec->cols;
939                 for (int ix=0; ix < numItems; ix++) {
940                         const double *spec = omxMatrixColumn(itemSpec, ix);
941                         int id = spec[RPF_ISpecID];
942                         double *iparam = omxMatrixColumn(itemParam, ix);
943                         ll += (*rpf_model[id].prior)(spec, iparam);
944                 }
945         }
946
947         if (!isfinite(ll)) {
948                 omxPrint(itemParam, "item param");
949                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
950         }
951
952         if (numSpecific == 0) {
953 #pragma omp parallel for num_threads(currentState->numThreads)
954                 for (long qx=0; qx < state->totalQuadPoints; qx++) {
955                         int quad[maxDims];
956                         decodeLocation(qx, maxDims, state->quadGridSize, quad);
957                         double thr_ll = ba81Fit1Ordinate(oo, quad, want);
958
959 #pragma omp atomic
960                         ll += thr_ll;
961                 }
962         } else {
963                 int sDim = state->maxDims-1;
964                 long *quadGridSize = state->quadGridSize;
965
966 #pragma omp parallel for num_threads(currentState->numThreads)
967                 for (long qx=0; qx < state->totalPrimaryPoints; qx++) {
968                         int quad[maxDims];
969                         decodeLocation(qx, maxDims, quadGridSize, quad);
970
971                         double thr_ll = 0;
972                         long specificPoints = quadGridSize[sDim];
973                         for (long sx=0; sx < specificPoints; sx++) {
974                                 quad[sDim] = sx;
975                                 thr_ll += ba81Fit1Ordinate(oo, quad, want);
976                         }
977 #pragma omp atomic
978                         ll += thr_ll;
979                 }
980         }
981
982         if (!customPrior && gradient) {
983                 double *thr0 = state->thrGradient;
984
985                 int numParams = itemParam->rows * itemParam->cols;
986                 for (int th=1; th < getNumThreads(oo); th++) {
987                         double *thrG = state->thrGradient + th * numParams;
988
989                         for (int ox=0; ox < numParams; ox++) {
990                                 if (state->paramMap[ox] == -1) continue;
991                                 thr0[ox] += thrG[ox];
992                         }
993                 }
994
995                 int numItems = itemParam->cols;
996                 for (int ix=0; ix < numItems; ix++) {
997                         const double *spec = omxMatrixColumn(itemSpec, ix);
998                         int id = spec[RPF_ISpecID];
999                         int *map = state->paramMap + itemParam->rows * ix;
1000                         int mask[itemParam->rows];
1001                         for (int mx=0; mx < itemParam->rows; mx++) {
1002                                 mask[mx] = (map[mx] >= 0)? mx : -1;
1003                         }
1004                         double *iparam = omxMatrixColumn(itemParam, ix);
1005                         (*rpf_model[id].gradient)(spec, iparam, mask, NULL, NULL,
1006                                                   thr0 + itemParam->rows * ix);
1007                 }
1008
1009                 for (int ox=0; ox < numParams; ox++) {
1010                         int to = state->paramMap[ox];
1011                         if (to == -1) continue;
1012
1013                         // Need to check because this can happen if
1014                         // lbounds/ubounds are not set appropriately.
1015                         if (!isfinite(thr0[ox])) {
1016                                 int item = ox / itemParam->rows;
1017                                 Rprintf("item parameters:\n");
1018                                 const double *spec = omxMatrixColumn(itemSpec, item);
1019                                 int id = spec[RPF_ISpecID];
1020                                 int numParam = (*rpf_model[id].numParam)(spec);
1021                                 double *iparam = omxMatrixColumn(itemParam, item);
1022                                 pda(iparam, numParam, 1);
1023                                 error("Gradient for item %d is %f; are you missing a lbound/ubound?",
1024                                       item, thr0[ox]);
1025                         }
1026
1027                         gradient[to] += -2 * thr0[ox];
1028                 }
1029         }
1030
1031         if (!isfinite(ll)) {
1032                 // This is a hack to avoid the need to specify
1033                 // ubound/lbound on parameters. Bounds are necessary
1034                 // mainly for debugging derivatives.
1035                 // Perhaps bounds can be pull in from librpf? TODO
1036                 return 2*state->ll;
1037         } else {
1038                 ll = -2 * ll;
1039                 state->ll = ll;
1040                 return ll;
1041         }
1042 }
1043
1044 double
1045 ba81ComputeFit(omxExpectation* oo, int want, double *gradient)
1046 {
1047         if (!want) return 0;
1048
1049         omxBA81State *state = (omxBA81State*) oo->argStruct;
1050         omxState* currentState = oo->currentState;
1051
1052         if (want & FF_COMPUTE_FIT) {
1053                 ++state->fitCount;
1054         }
1055
1056         if (want & FF_COMPUTE_GRADIENT) {
1057                 ++state->gradientCount;
1058
1059                 int numFreeParams = currentState->numFreeParams;
1060                 OMXZERO(gradient, numFreeParams);
1061
1062                 omxMatrix *itemParam = state->itemParam;
1063                 OMXZERO(state->thrGradient, itemParam->rows * itemParam->cols * getNumThreads(oo));
1064         }
1065
1066         double got = ba81ComputeFit1(oo, want, gradient);
1067         return got;
1068 }
1069
1070 static void
1071 ba81SetupQuadrature(omxExpectation* oo, int numPoints, double *points, double *area)
1072 {
1073         omxBA81State *state = (omxBA81State *) oo->argStruct;
1074         int numUnique = state->numUnique;
1075         int numThreads = getNumThreads(oo);
1076
1077         state->numQpoints = numPoints;
1078
1079         Free(state->Qpoint);
1080         Free(state->Qarea);
1081         state->Qpoint = Realloc(NULL, numPoints, double);
1082         state->Qarea = Realloc(NULL, numPoints, double);
1083         memcpy(state->Qpoint, points, sizeof(double)*numPoints);
1084         memcpy(state->Qarea, area, sizeof(double)*numPoints);
1085
1086         Free(state->logQarea);
1087
1088         state->logQarea = Realloc(NULL, state->numQpoints, double);
1089         for (int px=0; px < state->numQpoints; px++) {
1090                 state->logQarea[px] = log(state->Qarea[px]);
1091         }
1092
1093         state->totalQuadPoints = 1;
1094         state->totalPrimaryPoints = 1;
1095         state->quadGridSize = (long*) R_alloc(state->maxDims, sizeof(long));
1096         for (int dx=0; dx < state->maxDims; dx++) {
1097                 state->quadGridSize[dx] = state->numQpoints;
1098                 state->totalQuadPoints *= state->quadGridSize[dx];
1099                 if (dx < state->maxDims-1) {
1100                         state->totalPrimaryPoints *= state->quadGridSize[dx];
1101                 }
1102         }
1103
1104         Free(state->lxk);
1105
1106         if (!state->cacheLXK) {
1107                 state->lxk = Realloc(NULL, numUnique * numThreads, double);
1108         } else {
1109                 int ns = state->numSpecific;
1110                 if (ns == 0) ns = 1;
1111                 state->lxk = Realloc(NULL, numUnique * state->totalQuadPoints * ns, double);
1112         }
1113 }
1114
1115 static void
1116 ba81EAP1(omxExpectation *oo, double *workspace, long qx, int maxDims, int numUnique,
1117          double *ability, double *cov, double *spstats)
1118 {
1119         omxBA81State *state = (omxBA81State *) oo->argStruct;
1120         double *patternLik = state->patternLik;
1121         int quad[maxDims];
1122         decodeLocation(qx, maxDims, state->quadGridSize, quad);
1123         double where[maxDims];
1124         pointToWhere(state, quad, where, maxDims);
1125         double logArea = logAreaProduct(state, quad, maxDims);
1126         double *lxk = ba81LikelihoodFast(oo, 0, quad);
1127         double *myspace = workspace + 2 * maxDims * numUnique * omx_absolute_thread_num();
1128
1129         for (int px=0; px < numUnique; px++) {
1130                 double *piece = myspace + px * 2 * maxDims;
1131                 double plik = exp(lxk[px] + logArea - patternLik[px]);
1132                 for (int dx=0; dx < maxDims; dx++) {
1133                         piece[dx] = where[dx] * plik;
1134                 }
1135                 /*
1136                 for (int d1=0; d1 < maxDims; d1++) {
1137                         for (int d2=0; d2 <= d1; d2++) {
1138                                 covPiece[d1 * maxDims + d2] += piece[d1] * piece[d2];
1139                         }
1140                 }
1141                 */
1142         }
1143 #pragma omp critical(EAP1Update)
1144         for (int px=0; px < numUnique; px++) {
1145                 double *piece = myspace + px * 2 * maxDims;
1146                 double *arow = ability + px * 2 * maxDims;
1147                 for (int dx=0; dx < maxDims; dx++) {
1148                         arow[dx*2] += piece[dx];
1149                 }
1150                 /*
1151                 for (int d1=0; d1 < maxDims; d1++) {
1152                         for (int d2=0; d2 <= d1; d2++) {
1153                                 int loc = d1 * maxDims + d2;
1154                                 cov[loc] += covPiece[loc];
1155                         }
1156                 }
1157                 */
1158         }
1159 }
1160
1161 static void
1162 ba81EAP2(omxExpectation *oo, double *workspace, long qx, int maxDims, int numUnique,
1163          double *ability, double *spstats)
1164 {
1165         omxBA81State *state = (omxBA81State *) oo->argStruct;
1166         double *patternLik = state->patternLik;
1167         int quad[maxDims];
1168         decodeLocation(qx, maxDims, state->quadGridSize, quad);
1169         double where[maxDims];
1170         pointToWhere(state, quad, where, maxDims);
1171         double logArea = logAreaProduct(state, quad, maxDims);
1172         double *lxk = ba81LikelihoodFast(oo, 0, quad);
1173
1174         for (int px=0; px < numUnique; px++) {
1175                 double psd[maxDims];
1176                 double *arow = ability + px * 2 * maxDims;
1177                 for (int dx=0; dx < maxDims; dx++) {
1178                         // is this just sqrt(variance) and redundant with the covariance matrix? TODO
1179                         double ldiff = log(fabs(where[dx] - arow[dx*2]));
1180                         psd[dx] = exp(2 * ldiff + lxk[px] + logArea - patternLik[px]);
1181                 }
1182 #pragma omp critical(EAP1Update)
1183                 for (int dx=0; dx < maxDims; dx++) {
1184                         arow[dx*2+1] += psd[dx];
1185                 }
1186         }
1187 }
1188
1189 /**
1190  * MAP is not affected by the number of items. EAP is. Likelihood can
1191  * get concentrated in a single quadrature ordinate. For 3PL, response
1192  * patterns can have a bimodal likelihood. This will confuse MAP and
1193  * is a key advantage of EAP (Thissen & Orlando, 2001, p. 136).
1194  *
1195  * Thissen, D. & Orlando, M. (2001). IRT for items scored in two
1196  * categories. In D. Thissen & H. Wainer (Eds.), \emph{Test scoring}
1197  * (pp 73-140). Lawrence Erlbaum Associates, Inc.
1198  */
1199 omxRListElement *
1200 ba81EAP(omxExpectation *oo, int *numReturns)
1201 {
1202         omxBA81State *state = (omxBA81State *) oo->argStruct;
1203         state->converged = 1;
1204         int maxDims = state->maxDims;
1205         //int numSpecific = state->numSpecific;
1206
1207         *numReturns = 4; // + (maxDims > 1) + (numSpecific > 1);
1208         omxRListElement *out = (omxRListElement*) R_alloc(*numReturns, sizeof(omxRListElement));
1209         int ox=0;
1210
1211         out[ox].numValues = 1;
1212         out[ox].values = (double*) R_alloc(1, sizeof(double));
1213         strcpy(out[ox].label, "Minus2LogLikelihood");
1214         out[ox].values[0] = state->ll;
1215         ++ox;
1216
1217         out[ox].numValues = -1;
1218         strcpy(out[ox].label, "latent.cov");
1219         out[ox].rows = state->maxAbilities;
1220         out[ox].cols = state->maxAbilities;
1221         out[ox].values = state->latentCovOut;
1222         ++ox;
1223
1224         out[ox].numValues = state->maxAbilities;
1225         strcpy(out[ox].label, "latent.mean");
1226         out[ox].values = state->latentMeanOut;
1227         ++ox;
1228
1229         omxData *data = state->data;
1230         int numUnique = state->numUnique;
1231
1232         // TODO Wainer & Thissen. (1987). Estimating ability with the wrong
1233         // model. Journal of Educational Statistics, 12, 339-368.
1234
1235         int numQpoints = state->numQpoints * 2;  // make configurable TODO
1236
1237         if (numQpoints < 1 + 2.0 * sqrt(state->itemSpec->cols)) {
1238                 // Thissen & Orlando (2001, p. 136)
1239                 warning("EAP requires at least 2*sqrt(items) quadrature points");
1240         }
1241
1242         double Qpoint[numQpoints];
1243         double Qarea[numQpoints];
1244         const double Qwidth = 4;
1245         for (int qx=0; qx < numQpoints; qx++) {
1246                 Qpoint[qx] = Qwidth - qx * Qwidth*2 / (numQpoints-1);
1247                 Qarea[qx] = 1.0/numQpoints;
1248         }
1249         ba81SetupQuadrature(oo, numQpoints, Qpoint, Qarea);
1250         ba81Estep1(oo);   // recalc patternLik with a flat prior
1251
1252         double *cov = NULL;
1253         /*
1254         if (maxDims > 1) {
1255                 strcpy(out[2].label, "ability.cov");
1256                 out[2].numValues = -1;
1257                 out[2].rows = maxDims;
1258                 out[2].cols = maxDims;
1259                 out[2].values = (double*) R_alloc(out[2].rows * out[2].cols, sizeof(double));
1260                 cov = out[2].values;
1261                 OMXZERO(cov, out[2].rows * out[2].cols);
1262         }
1263         */
1264         double *spstats = NULL;
1265         /*
1266         if (numSpecific) {
1267                 strcpy(out[3].label, "specific");
1268                 out[3].numValues = -1;
1269                 out[3].rows = numSpecific;
1270                 out[3].cols = 2;
1271                 out[3].values = (double*) R_alloc(out[3].rows * out[3].cols, sizeof(double));
1272                 spstats = out[3].values;
1273         }
1274         */
1275
1276         // allocation of workspace could be optional
1277         int numThreads = getNumThreads(oo);
1278         double *workspace = Realloc(NULL, numUnique * maxDims * 2 * numThreads, double);
1279
1280         // Need a separate work space because the destination needs
1281         // to be in unsorted order with duplicated rows.
1282         double *ability = Calloc(numUnique * maxDims * 2, double);
1283
1284 #pragma omp parallel for num_threads(oo->currentState->numThreads)
1285         for (long qx=0; qx < state->totalQuadPoints; qx++) {
1286                 ba81EAP1(oo, workspace, qx, maxDims, numUnique, ability, cov, spstats);
1287         }
1288
1289         /*
1290         // make symmetric
1291         for (int d1=0; d1 < maxDims; d1++) {
1292                 for (int d2=0; d2 < d1; d2++) {
1293                         cov[d2 * maxDims + d1] = cov[d1 * maxDims + d2];
1294                 }
1295         }
1296         */
1297
1298 #pragma omp parallel for num_threads(oo->currentState->numThreads)
1299         for (long qx=0; qx < state->totalQuadPoints; qx++) {
1300                 ba81EAP2(oo, workspace, qx, maxDims, numUnique, ability, spstats);
1301         }
1302
1303         for (int px=0; px < numUnique; px++) {
1304                 double *arow = ability + px * 2 * maxDims;
1305                 for (int dx=0; dx < maxDims; dx++) {
1306                         arow[dx*2+1] = sqrt(arow[dx*2+1]);
1307                 }
1308         }
1309
1310         strcpy(out[ox].label, "ability");
1311         out[ox].numValues = -1;
1312         out[ox].rows = data->rows;
1313         out[ox].cols = 2 * maxDims;
1314         out[ox].values = (double*) R_alloc(out[ox].rows * out[ox].cols, sizeof(double));
1315
1316         for (int rx=0; rx < numUnique; rx++) {
1317                 double *pa = ability + rx * 2 * maxDims;
1318
1319                 int dups = omxDataNumIdenticalRows(state->data, state->rowMap[rx]);
1320                 for (int dup=0; dup < dups; dup++) {
1321                         int dest = omxDataIndex(data, state->rowMap[rx]+dup);
1322                         int col=0;
1323                         for (int dx=0; dx < maxDims; dx++) {
1324                                 out[ox].values[col * out[ox].rows + dest] = pa[col]; ++col;
1325                                 out[ox].values[col * out[ox].rows + dest] = pa[col]; ++col;
1326                         }
1327                 }
1328         }
1329         Free(ability);
1330         Free(workspace);
1331         ++ox;
1332
1333         for (int ix=0; ix < state->itemParam->cols; ix++) {
1334                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1335                 int id = spec[RPF_ISpecID];
1336                 double *param = omxMatrixColumn(state->itemParam, ix);
1337                 rpf_model[id].postfit(spec, param);
1338         }
1339
1340         return out;
1341 }
1342
1343 static void ba81Destroy(omxExpectation *oo) {
1344         if(OMX_DEBUG) {
1345                 Rprintf("Freeing %s function.\n", NAME);
1346         }
1347         omxBA81State *state = (omxBA81State *) oo->argStruct;
1348         Rprintf("fit %d gradient %d\n", state->fitCount, state->gradientCount);
1349         omxFreeAllMatrixData(state->itemSpec);
1350         omxFreeAllMatrixData(state->itemParam);
1351         omxFreeAllMatrixData(state->EitemParam);
1352         omxFreeAllMatrixData(state->design);
1353         omxFreeAllMatrixData(state->customPrior);
1354         Free(state->logNumIdentical);
1355         Free(state->numIdentical);
1356         Free(state->Qpoint);
1357         Free(state->Qarea);
1358         Free(state->logQarea);
1359         Free(state->rowMap);
1360         Free(state->patternLik);
1361         Free(state->lxk);
1362         Free(state->Slxk);
1363         Free(state->allSlxk);
1364         Free(state->Sgroup);
1365         Free(state->paramMap);
1366         Free(state->thrGradient);
1367         Free(state->thrGradient1);
1368         Free(state->latentMean);
1369         Free(state->latentCov);
1370         Free(state->latentMean1);
1371         Free(state->latentCov1);
1372         Free(state->latentMeanOut);
1373         Free(state->latentCovOut);
1374         Free(state);
1375 }
1376
1377 void omxInitExpectationBA81(omxExpectation* oo) {
1378         omxState* currentState = oo->currentState;      
1379         SEXP rObj = oo->rObj;
1380         SEXP tmp;
1381         
1382         if(OMX_DEBUG) {
1383                 Rprintf("Initializing %s.\n", NAME);
1384         }
1385         if (!rpf_model) {
1386                 const int wantVersion = 3;
1387                 int version;
1388                 get_librpf_t get_librpf = (get_librpf_t) R_GetCCallable("rpf", "get_librpf_model");
1389                 (*get_librpf)(&version, &rpf_numModels, &rpf_model);
1390                 if (version < wantVersion) error("librpf binary API %d installed, at least %d is required",
1391                                                  version, wantVersion);
1392         }
1393         
1394         omxBA81State *state = Calloc(1, omxBA81State);
1395         oo->argStruct = (void*) state;
1396
1397         state->ll = 1e20;   // finite but big
1398         
1399         PROTECT(tmp = GET_SLOT(rObj, install("data")));
1400         state->data = omxNewDataFromMxDataPtr(tmp, currentState);
1401         UNPROTECT(1);
1402
1403         if (strcmp(omxDataType(state->data), "raw") != 0) {
1404                 omxRaiseErrorf(currentState, "%s unable to handle data type %s", NAME, omxDataType(state->data));
1405                 return;
1406         }
1407
1408         PROTECT(state->rpf = GET_SLOT(rObj, install("RPF")));
1409         if (state->rpf == R_NilValue) {
1410                 state->computeRPF = standardComputeRPF;
1411         } else {
1412                 state->computeRPF = RComputeRPF;
1413         }
1414
1415         state->itemSpec =
1416                 omxNewMatrixFromIndexSlot(rObj, currentState, "ItemSpec");
1417         state->design =
1418                 omxNewMatrixFromIndexSlot(rObj, currentState, "Design");
1419         state->itemParam =
1420                 omxNewMatrixFromIndexSlot(rObj, currentState, "ItemParam");
1421         state->EitemParam =
1422                 omxInitTemporaryMatrix(NULL, state->itemParam->rows, state->itemParam->cols,
1423                                        TRUE, currentState);
1424         state->customPrior =
1425                 omxNewMatrixFromIndexSlot(rObj, currentState, "CustomPrior");
1426         
1427         oo->computeFun = ba81Estep;
1428         oo->destructFun = ba81Destroy;
1429         
1430         // TODO: Exactly identical rows do not contribute any information.
1431         // The sorting algorithm ought to remove them so we don't waste RAM.
1432         // The following summary stats would be cheaper to calculate too.
1433
1434         int numUnique = 0;
1435         omxData *data = state->data;
1436         if (omxDataNumFactor(data) != data->cols) {
1437                 // verify they are ordered factors TODO
1438                 omxRaiseErrorf(currentState, "%s: all columns must be factors", NAME);
1439                 return;
1440         }
1441
1442         for (int rx=0; rx < data->rows;) {
1443                 rx += omxDataNumIdenticalRows(state->data, rx);
1444                 ++numUnique;
1445         }
1446         state->numUnique = numUnique;
1447
1448         state->rowMap = Realloc(NULL, numUnique, int);
1449         state->numIdentical = Realloc(NULL, numUnique, int);
1450         state->logNumIdentical = Realloc(NULL, numUnique, double);
1451
1452         int numItems = state->itemParam->cols;
1453
1454         for (int rx=0, ux=0; rx < data->rows; ux++) {
1455                 if (rx == 0) {
1456                         // all NA rows will sort to the top
1457                         int na=0;
1458                         for (int ix=0; ix < numItems; ix++) {
1459                                 if (omxIntDataElement(data, 0, ix) == NA_INTEGER) { ++na; }
1460                         }
1461                         if (na == numItems) {
1462                                 omxRaiseErrorf(currentState, "Remove rows with all NAs");
1463                                 return;
1464                         }
1465                 }
1466                 int dups = omxDataNumIdenticalRows(state->data, rx);
1467                 state->numIdentical[ux] = dups;
1468                 state->logNumIdentical[ux] = log(dups);
1469                 state->rowMap[ux] = rx;
1470                 rx += dups;
1471         }
1472
1473         state->patternLik = Realloc(NULL, numUnique, double);
1474
1475         int numThreads = getNumThreads(oo);
1476
1477         int maxSpec = 0;
1478         int maxParam = 0;
1479         state->maxDims = 0;
1480         state->maxOutcomes = 0;
1481
1482         for (int ix=0; ix < numItems; ix++) {
1483                 double *spec = omxMatrixColumn(state->itemSpec, ix);
1484                 int id = spec[RPF_ISpecID];
1485                 if (id < 0 || id >= rpf_numModels) {
1486                         omxRaiseErrorf(currentState, "ItemSpec column %d has unknown item model %d", ix, id);
1487                         return;
1488                 }
1489
1490                 double *param = omxMatrixColumn(state->itemParam, ix);
1491                 rpf_model[id].prefit(spec, param);
1492         }
1493
1494         for (int cx = 0; cx < data->cols; cx++) {
1495                 const double *spec = omxMatrixColumn(state->itemSpec, cx);
1496                 int id = spec[RPF_ISpecID];
1497                 int dims = spec[RPF_ISpecDims];
1498                 if (state->maxDims < dims)
1499                         state->maxDims = dims;
1500
1501                 // TODO verify that item model can have requested number of outcomes
1502                 int no = spec[RPF_ISpecOutcomes];
1503                 if (state->maxOutcomes < no)
1504                         state->maxOutcomes = no;
1505
1506                 // TODO this summary stat should be available from omxData
1507                 int dataMax=0;
1508                 for (int rx=0; rx < data->rows; rx++) {
1509                         int pick = omxIntDataElementUnsafe(data, rx, cx);
1510                         if (dataMax < pick)
1511                                 dataMax = pick;
1512                 }
1513                 if (dataMax > no) {
1514                         error("Data for item %d has %d outcomes, not %d", cx+1, dataMax, no);
1515                 } else if (dataMax < no) {
1516                         warning("Data for item %d has only %d outcomes, not %d", cx+1, dataMax, no);
1517                 }
1518
1519                 int numSpec = (*rpf_model[id].numSpec)(spec);
1520                 if (maxSpec < numSpec)
1521                         maxSpec = numSpec;
1522
1523                 int numParam = (*rpf_model[id].numParam)(spec);
1524                 if (maxParam < numParam)
1525                         maxParam = numParam;
1526         }
1527
1528         if (state->itemSpec->cols != data->cols || state->itemSpec->rows != maxSpec) {
1529                 omxRaiseErrorf(currentState, "ItemSpec must have %d item columns and %d rows",
1530                                data->cols, maxSpec);
1531                 return;
1532         }
1533         if (state->itemParam->rows != maxParam) {
1534                 omxRaiseErrorf(currentState, "ItemParam should have %d rows", maxParam);
1535                 return;
1536         }
1537
1538         if (state->design == NULL) {
1539                 state->maxAbilities = state->maxDims;
1540                 state->design = omxInitTemporaryMatrix(NULL, state->maxDims, numItems,
1541                                        TRUE, currentState);
1542                 for (int ix=0; ix < numItems; ix++) {
1543                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1544                         int dims = spec[RPF_ISpecDims];
1545                         for (int dx=0; dx < state->maxDims; dx++) {
1546                                 omxSetMatrixElement(state->design, dx, ix, dx < dims? (double)dx+1 : nan(""));
1547                         }
1548                 }
1549         } else {
1550                 omxMatrix *design = state->design;
1551                 if (design->cols != numItems ||
1552                     design->rows != state->maxDims) {
1553                         omxRaiseErrorf(currentState, "Design matrix should have %d rows and %d columns",
1554                                        state->maxDims, numItems);
1555                         return;
1556                 }
1557
1558                 state->maxAbilities = 0;
1559                 for (int ix=0; ix < design->rows * design->cols; ix++) {
1560                         double got = design->data[ix];
1561                         if (!R_FINITE(got)) continue;
1562                         if (round(got) != (int)got) error("Design matrix can only contain integers"); // TODO better way?
1563                         if (state->maxAbilities < got)
1564                                 state->maxAbilities = got;
1565                 }
1566                 for (int ix=0; ix < design->cols; ix++) {
1567                         const double *idesign = omxMatrixColumn(design, ix);
1568                         int ddim = 0;
1569                         for (int rx=0; rx < design->rows; rx++) {
1570                                 if (isfinite(idesign[rx])) ddim += 1;
1571                         }
1572                         const double *spec = omxMatrixColumn(state->itemSpec, ix);
1573                         int dims = spec[RPF_ISpecDims];
1574                         if (ddim > dims) error("Item %d has %d dims but design assigns %d", ix, dims, ddim);
1575                 }
1576         }
1577         if (state->maxAbilities <= state->maxDims) {
1578                 state->Sgroup = Calloc(numItems, int);
1579         } else {
1580                 // Not sure if this is correct, revisit TODO
1581                 int Sgroup0 = -1;
1582                 state->Sgroup = Realloc(NULL, numItems, int);
1583                 for (int dx=0; dx < state->maxDims; dx++) {
1584                         for (int ix=0; ix < numItems; ix++) {
1585                                 int ability = omxMatrixElement(state->design, dx, ix);
1586                                 if (dx < state->maxDims - 1) {
1587                                         if (Sgroup0 <= ability)
1588                                                 Sgroup0 = ability+1;
1589                                         continue;
1590                                 }
1591                                 int ss=-1;
1592                                 if (ability >= Sgroup0) {
1593                                         if (ss == -1) {
1594                                                 ss = ability;
1595                                         } else {
1596                                                 omxRaiseErrorf(currentState, "Item %d cannot belong to more than "
1597                                                                "1 specific dimension (both %d and %d)",
1598                                                                ix, ss, ability);
1599                                                 return;
1600                                         }
1601                                 }
1602                                 if (ss == -1) ss = Sgroup0;
1603                                 state->Sgroup[ix] = ss - Sgroup0;
1604                         }
1605                 }
1606                 state->numSpecific = state->maxAbilities - state->maxDims + 1;
1607                 state->allSlxk = Realloc(NULL, numUnique * numThreads, double);
1608                 state->Slxk = Realloc(NULL, numUnique * state->numSpecific * numThreads, double);
1609         }
1610
1611         PROTECT(tmp = GET_SLOT(rObj, install("doRescale")));
1612         state->doRescale = asLogical(tmp);
1613
1614         PROTECT(tmp = GET_SLOT(rObj, install("cache")));
1615         state->cacheLXK = asLogical(tmp);
1616
1617         PROTECT(tmp = GET_SLOT(rObj, install("GHpoints")));
1618         double *qpoints = REAL(tmp);
1619         int numQPoints = length(tmp);
1620
1621         PROTECT(tmp = GET_SLOT(rObj, install("GHarea")));
1622         double *qarea = REAL(tmp);
1623         if (numQPoints != length(tmp)) error("length(GHpoints) != length(GHarea)");
1624
1625         ba81SetupQuadrature(oo, numQPoints, qpoints, qarea);
1626
1627         state->latentMean = Realloc(NULL, state->maxAbilities * numUnique, double);
1628         state->latentCov = Realloc(NULL, state->maxAbilities * state->maxAbilities * numUnique, double);
1629         state->latentMean1 = Realloc(NULL, state->maxDims, double);
1630         state->latentCov1 = Realloc(NULL, state->maxDims * state->maxDims, double);
1631         state->latentMeanOut = Realloc(NULL, state->maxAbilities, double);
1632         state->latentCovOut = Realloc(NULL, state->maxAbilities * state->maxAbilities, double);
1633
1634         buildParamMap(oo);
1635
1636         // verify data bounded between 1 and numOutcomes TODO
1637         // hm, looks like something could be added to omxData for column summary stats?
1638 }