Fixed a bug where could not be accessed from a model object, and adjusted one test...
[openmx:openmx.git] / src / omxExpectationBA81.cpp
1 /*
2   Copyright 2012-2014 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <limits>
19 #include <Rmath.h>
20
21 #include "omxExpectationBA81.h"
22 #include "glue.h"
23 #include "libifa-rpf.h"
24 #include "dmvnorm.h"
25 #include "omxBuffer.h"
26 #include "matrix.h"
27
28 const struct rpf *rpf_model = NULL;
29 int rpf_numModels;
30
31 void pda(const double *ar, int rows, int cols)
32 {
33         if (rows == 0 || cols == 0) return;
34         std::string buf;
35         for (int rx=0; rx < rows; rx++) {   // column major order
36                 for (int cx=0; cx < cols; cx++) {
37                         buf += string_snprintf("%.6g, ", ar[cx * rows + rx]);
38                 }
39                 buf += "\n";
40         }
41         mxLogBig(buf);
42 }
43
44 void pia(const int *ar, int rows, int cols)
45 {
46         if (rows == 0 || cols == 0) return;
47         std::string buf;
48         for (int rx=0; rx < rows; rx++) {   // column major order
49                 for (int cx=0; cx < cols; cx++) {
50                         buf += string_snprintf("%d, ", ar[cx * rows + rx]);
51                 }
52                 buf += "\n";
53         }
54         mxLogBig(buf);
55 }
56
57 template <typename T>
58 void BA81LatentFixed<T>::normalizeWeights(class ifaGroup *grp, T extraData,
59                                           int px, double *Qweight, double patternLik1, int thrId)
60 {
61         double weight = grp->rowWeight[px] / patternLik1;
62         int pts = grp->quad.weightTableSize;
63         for (int qx=0; qx < pts; ++qx) {
64                 Qweight[qx] *= weight;
65         }
66 }
67
68 template <typename T>
69 void BA81LatentSummary<T>::begin(class ifaGroup *state, T extraData)
70 {
71         extraData->thrDweight.assign(state->quad.weightTableSize * Global->numThreads, 0.0);
72 }
73
74 template <typename T>
75 void BA81LatentSummary<T>::normalizeWeights(class ifaGroup *grp, T extraData,
76                                             int px, double *Qweight, double patternLik1, int thrId)
77 {
78         double weight = grp->rowWeight[px] / patternLik1;
79         int pts = grp->quad.weightTableSize;
80         double *Dweight = extraData->thrDweight.data() + pts * thrId;
81         for (int qx=0; qx < pts; ++qx) {
82                 double tmp = Qweight[qx] * weight;
83                 Dweight[qx] += tmp;
84                 Qweight[qx] = tmp;
85         }
86 }
87
88 static void exportLatentDistToOMX(ba81NormalQuad &quad, double *latentDist1, omxMatrix *meanOut, omxMatrix *covOut)
89 {
90         const int maxAbilities = quad.maxAbilities;
91         const int primaryDims = quad.primaryDims;
92
93         if (meanOut) {
94                 for (int d1=0; d1 < maxAbilities; d1++) {
95                         omxSetVectorElement(meanOut, d1, latentDist1[d1]);
96                 }
97         }
98
99         if (covOut) {
100                 for (int d1=0; d1 < primaryDims; d1++) {
101                         int cx = maxAbilities + triangleLoc1(d1);
102                         for (int d2=0; d2 <= d1; d2++) {
103                                 double cov = latentDist1[cx];
104                                 omxSetMatrixElement(covOut, d1, d2, cov);
105                                 if (d1 != d2) omxSetMatrixElement(covOut, d2, d1, cov);
106                                 ++cx;
107                         }
108                 }
109                 for (int d1=primaryDims; d1 < maxAbilities; d1++) {
110                         int loc = maxAbilities + triangleLoc0(d1);
111                         omxSetMatrixElement(covOut, d1, d1, latentDist1[loc]);
112                 }
113         }
114 }
115
116 template <typename T>
117 void BA81LatentSummary<T>::end(class ifaGroup *grp, T extraData)
118 {
119         int pts = grp->quad.weightTableSize;
120         double *thrDweight = extraData->thrDweight.data();
121
122         for (int tx=1; tx < Global->numThreads; ++tx) {
123                 double *Dweight = thrDweight + pts * tx;
124                 double *dest = thrDweight;
125                 for (int qx=0; qx < pts; ++qx) {
126                         dest[qx] += Dweight[qx];
127                 }
128         }
129
130         // could shrink thrDweight since thr=0 has all the info
131
132         ba81NormalQuad &quad = grp->quad;
133         int numLatents = quad.maxAbilities + triangleLoc1(quad.maxAbilities);
134         std::vector<double> latentDist;
135         latentDist.assign(numLatents, 0.0);
136         quad.EAP(thrDweight, 1/extraData->weightSum, latentDist.data());
137         exportLatentDistToOMX(quad, latentDist.data(), extraData->estLatentMean, extraData->estLatentCov);
138
139         ++extraData->ElatentVersion;
140 }
141
142 void ba81AggregateDistributions(std::vector<struct omxExpectation *> &expectation,
143                                 int *version, omxMatrix *meanMat, omxMatrix *covMat)
144 {
145         BA81Expect *exemplar = (BA81Expect *) expectation[0]->argStruct;
146         ba81NormalQuad &quad = exemplar->getQuad();
147         int pts = quad.weightTableSize;
148         Eigen::ArrayXd dist(pts);
149         dist.setZero();
150
151         int allVer = 0;
152         for (size_t ex=0; ex < expectation.size(); ++ex) {
153                 BA81Expect *ba81 = (BA81Expect *) expectation[ex]->argStruct;
154                 allVer += ba81->ElatentVersion;
155         }
156         if (*version == allVer) return;
157         *version = allVer;
158
159         int got = 0;
160         for (size_t ex=0; ex < expectation.size(); ++ex) {
161                 BA81Expect *ba81 = (BA81Expect *) expectation[ex]->argStruct;
162                 if (ba81->thrDweight.size() == 0) continue;
163                 double weight = 1/ba81->weightSum;
164                 for (int qx = 0; qx < pts; ++qx) {
165                         dist[qx] += weight * ba81->thrDweight[qx];
166                 }
167                 ++got;
168         }
169         if (got == 0) return;
170
171         if (got != (int) expectation.size()) {
172                 // maybe OK?
173                 Rf_error("ba81AggregateDistributions: %d/%d expectations ready",
174                          got, (int) expectation.size());
175         }
176
177         int numLatents = quad.maxAbilities + triangleLoc1(quad.maxAbilities);
178         std::vector<double> latentDist;
179         latentDist.assign(numLatents, 0.0);
180         quad.EAP(dist.data(), 1.0/got, latentDist.data());
181         exportLatentDistToOMX(quad, latentDist.data(), meanMat, covMat);
182 }
183
184 template <typename T, typename CovType>
185 void BA81Estep<T, CovType>::begin(ifaGroup *state, T extraData)
186 {
187         ba81NormalQuad &quad = state->quad;
188         thrExpected.assign(state->totalOutcomes * quad.totalQuadPoints * Global->numThreads, 0.0);
189 }
190
191 template <typename T, typename CovType>
192 void BA81Estep<T, CovType>::addRow(class ifaGroup *state, T extraData, int px, double *Qweight, int thrId)
193 {
194         double *out = thrExpected.data() + thrId * state->totalOutcomes * state->quad.totalQuadPoints;
195         BA81EstepBase<CovType>::addRow1(state, px, Qweight, out);
196 }
197
198 template<>
199 void BA81EstepBase<BA81Dense>::addRow1(class ifaGroup *grp, int px, double *Qweight, double *out)
200 {
201         std::vector<int> &rowMap = grp->rowMap;
202         std::vector<int> &itemOutcomes = grp->itemOutcomes;
203         ba81NormalQuad &quad = grp->getQuad();
204         const int totalQuadPoints = quad.totalQuadPoints;
205
206         for (int ix=0; ix < grp->numItems(); ++ix) {
207                 int pick = grp->dataColumns[ix][rowMap[px]];
208                 if (pick == NA_INTEGER) {
209                         out += itemOutcomes[ix] * totalQuadPoints;
210                         continue;
211                 }
212                 pick -= 1;
213
214                 for (int qx=0; qx < totalQuadPoints; ++qx) {
215                         out[pick] += Qweight[qx];
216                         out += itemOutcomes[ix];
217                 }
218         }
219 }
220
221 template<>
222 void BA81EstepBase<BA81TwoTier>::addRow1(class ifaGroup *grp, int px, double *Qweight, double *out)
223 {
224         std::vector<int> &rowMap = grp->rowMap;
225         std::vector<int> &itemOutcomes = grp->itemOutcomes;
226         ba81NormalQuad &quad = grp->getQuad();
227         const int numSpecific = quad.numSpecific;
228         const int totalQuadPoints = quad.totalQuadPoints;
229
230         for (int ix=0; ix < grp->numItems(); ++ix) {
231                 int pick = grp->dataColumns[ix][rowMap[px]];
232                 if (pick == NA_INTEGER) {
233                         out += itemOutcomes[ix] * totalQuadPoints;
234                         continue;
235                 }
236                 pick -= 1;
237
238                 int Sgroup = grp->Sgroup[ix];
239                 double *Qw = Qweight;
240                 for (int qx=0; qx < totalQuadPoints; ++qx) {
241                         out[pick] += Qw[Sgroup];
242                         out += itemOutcomes[ix];
243                         Qw += numSpecific;
244                 }
245         }
246 }
247
248 template <typename T, typename CovType>
249 void BA81Estep<T, CovType>::recordTable(class ifaGroup *state, T extraData)
250 {
251         const int numThreads = Global->numThreads;
252         ba81NormalQuad &quad = state->getQuad();
253         const int expectedSize = quad.totalQuadPoints * state->totalOutcomes;
254         double *e1 = thrExpected.data();
255
256         extraData->expected = Realloc(extraData->expected, state->totalOutcomes * quad.totalQuadPoints, double);
257         memcpy(extraData->expected, e1, sizeof(double) * expectedSize);
258         e1 += expectedSize;
259
260         for (int tx=1; tx < numThreads; ++tx) {
261                 for (int ex=0; ex < expectedSize; ++ex) {
262                         extraData->expected[ex] += *e1;
263                         ++e1;
264                 }
265         }
266 }
267
268 static int getLatentVersion(BA81Expect *state)
269 {
270         int vv = 1;  // to ensure it doesn't match on the first test
271         if (state->_latentMeanOut) vv += omxGetMatrixVersion(state->_latentMeanOut);
272         if (state->_latentCovOut) vv += omxGetMatrixVersion(state->_latentCovOut);
273         return vv;
274 }
275
276 // Attempt G-H grid? http://dbarajassolano.wordpress.com/2012/01/26/on-sparse-grid-quadratures/
277 void ba81SetupQuadrature(omxExpectation* oo)
278 {
279         BA81Expect *state = (BA81Expect *) oo->argStruct;
280         ba81NormalQuad &quad = state->getQuad();
281         bool latentClean = state->latentParamVersion == getLatentVersion(state);
282         if (quad.Qpoint.size() == 0 && latentClean) return;
283
284         int maxAbilities = state->grp.maxAbilities;
285         if (maxAbilities == 0) {
286                 quad.setup0();
287                 state->latentParamVersion = getLatentVersion(state);
288                 return;
289         }
290
291         Eigen::VectorXd mean;
292         Eigen::MatrixXd fullCov;
293         state->getLatentDistribution(NULL, mean, fullCov);
294
295         if (state->verbose >= 1) {
296                 mxLog("%s: set up quadrature", oo->name);
297                 if (state->verbose >= 2) {
298                         pda(mean.data(), 1, maxAbilities);
299                         pda(fullCov.data(), maxAbilities, maxAbilities);
300                 }
301         }
302
303         int numSpecific = state->grp.numSpecific;
304         int priDims = maxAbilities - state->grp.numSpecific;
305         Eigen::MatrixXd cov = fullCov.topLeftCorner(priDims, priDims);
306         Eigen::VectorXd sVar(numSpecific);
307
308         // This is required because the EM acceleration can push the
309         // covariance matrix to be slightly non-pd when predictors
310         // are highly correlated.
311         if (priDims == 1) {
312                 if (cov(0,0) < BA81_MIN_VARIANCE) cov(0,0) = BA81_MIN_VARIANCE;
313         } else {
314                 Matrix mat(cov.data(), priDims, priDims);
315                 InplaceForcePosSemiDef(mat, NULL, NULL);
316         }
317
318         for (int sx=0; sx < numSpecific; ++sx) {
319                 int loc = priDims + sx;
320                 double tmp = fullCov(loc, loc);
321                 if (tmp < BA81_MIN_VARIANCE) tmp = BA81_MIN_VARIANCE;
322                 sVar(sx) = tmp;
323         }
324
325         quad.setup(state->grp.qwidth, state->grp.qpoints, mean.data(), cov, sVar);
326
327         state->latentParamVersion = getLatentVersion(state);
328 }
329
330 void refreshPatternLikelihood(BA81Expect *state, bool hasFreeLatent)
331 {
332         ba81NormalQuad &quad = state->getQuad();
333
334         if (hasFreeLatent) {
335                 if (quad.numSpecific == 0) {
336                         BA81Engine<typeof(state), BA81Dense, BA81LatentSummary, BA81OmitEstep> engine;
337                         engine.ba81Estep1(&state->grp, state);
338                 } else {
339                         BA81Engine<typeof(state), BA81TwoTier, BA81LatentSummary, BA81OmitEstep> engine;
340                         engine.ba81Estep1(&state->grp, state);
341                 }
342         } else {
343                 if (quad.numSpecific == 0) {
344                         BA81Engine<typeof(state), BA81Dense, BA81LatentFixed, BA81OmitEstep> engine;
345                         engine.ba81Estep1(&state->grp, state);
346                 } else {
347                         BA81Engine<typeof(state), BA81TwoTier, BA81LatentFixed, BA81OmitEstep> engine;
348                         engine.ba81Estep1(&state->grp, state);
349                 }
350         }
351 }
352
353 static void
354 ba81compute(omxExpectation *oo, const char *what, const char *how)
355 {
356         BA81Expect *state = (BA81Expect *) oo->argStruct;
357
358         if (what) {
359                 if (strcmp(what, "latentDistribution")==0 && how && strcmp(how, "copy")==0) {
360                         omxCopyMatrix(state->_latentMeanOut, state->estLatentMean);
361                         omxCopyMatrix(state->_latentCovOut, state->estLatentCov);
362                         return;
363                 }
364
365                 if (strcmp(what, "scores")==0) {
366                         state->expectedUsed = true;
367                         state->type = EXPECTATION_AUGMENTED;
368                 } else if (strcmp(what, "nothing")==0) {
369                         state->type = EXPECTATION_OBSERVED;
370                 } else {
371                         omxRaiseErrorf("%s: don't know how to predict '%s'",
372                                        oo->name, what);
373                 }
374
375                 if (state->verbose >= 1) {
376                         mxLog("%s: predict %s", oo->name, what);
377                 }
378                 return;
379         }
380
381         bool latentClean = state->latentParamVersion == getLatentVersion(state);
382         bool itemClean = state->itemParamVersion == omxGetMatrixVersion(state->itemParam) && latentClean;
383
384         ba81NormalQuad &quad = state->getQuad();
385
386         if (state->verbose >= 1) {
387                 mxLog("%s: Qinit %d itemClean %d latentClean %d (1=clean) expectedUsed=%d",
388                       oo->name, quad.Qpoint.size() != 0, itemClean, latentClean, state->expectedUsed);
389         }
390
391         if (!latentClean) ba81SetupQuadrature(oo);
392
393         if (!itemClean) {
394                 double *param = state->EitemParam? state->EitemParam : state->itemParam->data;
395                 state->grp.ba81OutcomeProb(param, FALSE);
396
397                 bool estep = state->expectedUsed;
398                 if (estep) {
399                         if (quad.numSpecific == 0) {
400                                 if (oo->dynamicDataSource) {
401                                         BA81Engine<typeof(state), BA81Dense, BA81LatentSummary, BA81Estep> engine;
402                                         engine.ba81Estep1(&state->grp, state);
403                                 } else {
404                                         BA81Engine<typeof(state), BA81Dense, BA81LatentFixed, BA81Estep> engine;
405                                         engine.ba81Estep1(&state->grp, state);
406                                 }
407                         } else {
408                                 if (oo->dynamicDataSource) {
409                                         BA81Engine<typeof(state), BA81TwoTier, BA81LatentSummary, BA81Estep> engine;
410                                         engine.ba81Estep1(&state->grp, state);
411                                 } else {
412                                         BA81Engine<typeof(state), BA81TwoTier, BA81LatentFixed, BA81Estep> engine;
413                                         engine.ba81Estep1(&state->grp, state);
414                                 }
415                         }
416                 } else {
417                         Free(state->expected);
418                         refreshPatternLikelihood(state, oo->dynamicDataSource);
419                 }
420                 if (oo->dynamicDataSource && state->verbose >= 2) {
421                         mxLog("%s: empirical distribution:", state->name);
422                         omxPrint(state->estLatentMean, "mean");
423                         omxPrint(state->estLatentCov, "cov");
424                 }
425                 if (state->verbose >= 1) {
426                         const int numUnique = state->getNumUnique();
427                         mxLog("%s: estep<%s, %s, %s> %d/%d rows excluded",
428                               state->name,
429                               (quad.numSpecific == 0? "dense":"twotier"),
430                               (estep && oo->dynamicDataSource? "summary":"fixed"),
431                               (estep? "estep":"omitEstep"),
432                               state->grp.excludedPatterns, numUnique);
433                 }
434         }
435
436         state->itemParamVersion = omxGetMatrixVersion(state->itemParam);
437 }
438
439 /**
440  * MAP is not affected by the number of items. EAP is. Likelihood can
441  * get concentrated in a single quadrature ordinate. For 3PL, response
442  * patterns can have a bimodal likelihood. This will confuse MAP and
443  * is a key advantage of EAP (Thissen & Orlando, 2001, p. 136).
444  *
445  * Thissen, D. & Orlando, M. (2001). IRT for items scored in two
446  * categories. In D. Thissen & H. Wainer (Eds.), \emph{Test scoring}
447  * (pp 73-140). Lawrence Erlbaum Associates, Inc.
448  */
449 static void
450 ba81PopulateAttributes(omxExpectation *oo, SEXP robj)
451 {
452         BA81Expect *state = (BA81Expect *) oo->argStruct;
453         if (!state->debugInternal) return;
454
455         ba81NormalQuad &quad = state->getQuad();
456         int maxAbilities = quad.maxAbilities;
457         const int numUnique = state->getNumUnique();
458
459         const double LogLargest = state->LogLargestDouble;
460         int totalOutcomes = state->totalOutcomes();
461         SEXP Rlik;
462         SEXP Rexpected;
463
464         if (state->grp.patternLik.size() != numUnique) {
465                 refreshPatternLikelihood(state, oo->dynamicDataSource);
466         }
467
468         Rf_protect(Rlik = Rf_allocVector(REALSXP, numUnique));
469         memcpy(REAL(Rlik), state->grp.patternLik.data(), sizeof(double) * numUnique);
470         double *lik_out = REAL(Rlik);
471         for (int px=0; px < numUnique; ++px) {
472                 // Must return value in log units because it may not be representable otherwise
473                 lik_out[px] = log(lik_out[px]) - LogLargest;
474         }
475
476         MxRList dbg;
477         dbg.add("patternLikelihood", Rlik);
478
479         if (state->expected) {
480                 Rf_protect(Rexpected = Rf_allocVector(REALSXP, quad.totalQuadPoints * totalOutcomes));
481                 memcpy(REAL(Rexpected), state->expected, sizeof(double) * totalOutcomes * quad.totalQuadPoints);
482                 dbg.add("em.expected", Rexpected);
483         }
484
485         SEXP Rmean, Rcov;
486         if (state->estLatentMean) {
487                 Rf_protect(Rmean = Rf_allocVector(REALSXP, maxAbilities));
488                 memcpy(REAL(Rmean), state->estLatentMean->data, maxAbilities * sizeof(double));
489                 dbg.add("mean", Rmean);
490         }
491         if (state->estLatentCov) {
492                 Rf_protect(Rcov = Rf_allocMatrix(REALSXP, maxAbilities, maxAbilities));
493                 memcpy(REAL(Rcov), state->estLatentCov->data, maxAbilities * maxAbilities * sizeof(double));
494                 dbg.add("cov", Rcov);
495         }
496
497         Rf_setAttrib(robj, Rf_install("debug"), dbg.asR());
498 }
499
500 static void ba81Destroy(omxExpectation *oo) {
501         if(OMX_DEBUG) {
502                 mxLog("Freeing %s function.", oo->name);
503         }
504         BA81Expect *state = (BA81Expect *) oo->argStruct;
505         omxFreeMatrix(state->estLatentMean);
506         omxFreeMatrix(state->estLatentCov);
507         Free(state->expected);
508         delete state;
509 }
510
511 static void ignoreSetVarGroup(omxExpectation*, FreeVarGroup *)
512 {}
513
514 static omxMatrix *getComponent(omxExpectation *oo, omxFitFunction*, const char *what)
515 {
516         BA81Expect *state = (BA81Expect *) oo->argStruct;
517
518         if (strcmp(what, "covariance")==0) {
519                 return state->estLatentCov;
520         } else if (strcmp(what, "mean")==0) {
521                 return state->estLatentMean;
522         } else {
523                 return NULL;
524         }
525 }
526
527 void getMatrixDims(SEXP r_theta, int *rows, int *cols)
528 {
529     SEXP matrixDims;
530     ScopedProtect p1(matrixDims, Rf_getAttrib(r_theta, R_DimSymbol));
531     int *dimList = INTEGER(matrixDims);
532     *rows = dimList[0];
533     *cols = dimList[1];
534 }
535
536 void omxInitExpectationBA81(omxExpectation* oo) {
537         omxState* currentState = oo->currentState;      
538         SEXP rObj = oo->rObj;
539         SEXP tmp;
540         
541         if(OMX_DEBUG) {
542                 mxLog("Initializing %s.", oo->name);
543         }
544         if (!rpf_model) {
545                 if (0) {
546                         const int wantVersion = 3;
547                         int version;
548                         get_librpf_t get_librpf = (get_librpf_t) R_GetCCallable("rpf", "get_librpf_model_GPL");
549                         (*get_librpf)(&version, &rpf_numModels, &rpf_model);
550                         if (version < wantVersion) Rf_error("librpf binary API %d installed, at least %d is required",
551                                                          version, wantVersion);
552                 } else {
553                         rpf_numModels = librpf_numModels;
554                         rpf_model = librpf_model;
555                 }
556         }
557         
558         BA81Expect *state = new BA81Expect;
559
560         // These two constants should be as identical as possible
561         state->name = oo->name;
562         state->LogLargestDouble = log(std::numeric_limits<double>::max()) - 1;
563         state->LargestDouble = exp(state->LogLargestDouble);
564         ba81NormalQuad &quad = state->getQuad();
565         quad.setOne(state->LargestDouble);
566
567         state->expectedUsed = false;
568
569         state->estLatentMean = NULL;
570         state->estLatentCov = NULL;
571         state->expected = NULL;
572         state->type = EXPECTATION_OBSERVED;
573         state->itemParam = NULL;
574         state->EitemParam = NULL;
575         state->itemParamVersion = 0;
576         state->latentParamVersion = 0;
577         oo->argStruct = (void*) state;
578
579         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("data")));
580         state->data = omxDataLookupFromState(tmp, currentState);
581         }
582
583         if (strcmp(omxDataType(state->data), "raw") != 0) {
584                 omxRaiseErrorf("%s unable to handle data type %s", oo->name, omxDataType(state->data));
585                 return;
586         }
587
588         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("verbose")));
589         state->verbose = Rf_asInteger(tmp);
590         }
591
592         int targetQpoints;
593         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("qpoints")));
594                 targetQpoints = Rf_asInteger(tmp);
595         }
596
597         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("qwidth")));
598         state->grp.setGridFineness(Rf_asReal(tmp), targetQpoints);
599         }
600
601         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("ItemSpec")));
602         state->grp.importSpec(tmp);
603         if (state->verbose >= 2) mxLog("%s: found %d item specs", oo->name, state->numItems());
604         }
605
606         state->_latentMeanOut = omxNewMatrixFromSlot(rObj, currentState, "mean");
607         state->_latentCovOut  = omxNewMatrixFromSlot(rObj, currentState, "cov");
608
609         state->itemParam = omxNewMatrixFromSlot(rObj, currentState, "item");
610         state->grp.param = state->itemParam->data; // algebra not allowed yet TODO
611
612         const int numItems = state->itemParam->cols;
613         if (state->numItems() != numItems) {
614                 omxRaiseErrorf("ItemSpec length %d must match the number of item columns (%d)",
615                                state->numItems(), numItems);
616                 return;
617         }
618         if (state->itemParam->rows != state->grp.impliedParamRows) {
619                 omxRaiseErrorf("item matrix must have %d rows", state->grp.impliedParamRows);
620                 return;
621         }
622         state->grp.paramRows = state->itemParam->rows;
623
624         // for algebra item param, will need to defer until later?
625         state->grp.learnMaxAbilities();
626
627         int maxAbilities = state->grp.maxAbilities;
628
629         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("EstepItem")));
630         if (!Rf_isNull(tmp)) {
631                 int rows, cols;
632                 getMatrixDims(tmp, &rows, &cols);
633                 if (rows != state->itemParam->rows || cols != state->itemParam->cols) {
634                         Rf_error("EstepItem must have the same dimensions as the item MxMatrix");
635                 }
636                 state->EitemParam = REAL(tmp);
637         }
638         }
639
640         oo->computeFun = ba81compute;
641         oo->setVarGroup = ignoreSetVarGroup;
642         oo->destructFun = ba81Destroy;
643         oo->populateAttrFun = ba81PopulateAttributes;
644         oo->componentFun = getComponent;
645         oo->canDuplicate = false;
646         
647         // TODO: Exactly identical rows do not contribute any information.
648         // The sorting algorithm ought to remove them so we get better cache behavior.
649         // The following summary stats would be cheaper to calculate too.
650
651         omxData *data = state->data;
652         std::vector<int> &rowMap = state->grp.rowMap;
653
654         int weightCol;
655         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("weightColumn")));
656                 weightCol = INTEGER(tmp)[0];
657         }
658
659         if (weightCol == NA_INTEGER) {
660                 // Should rowMap be part of omxData? This is essentially a
661                 // generic compression step that shouldn't be specific to IFA models.
662                 state->grp.rowWeight = (double*) R_alloc(data->rows, sizeof(double));
663                 rowMap.resize(data->rows);
664                 int numUnique = 0;
665                 for (int rx=0; rx < data->rows; ) {
666                         int rw = omxDataNumIdenticalRows(state->data, rx);
667                         state->grp.rowWeight[numUnique] = rw;
668                         rowMap[numUnique] = rx;
669                         rx += rw;
670                         ++numUnique;
671                 }
672                 rowMap.resize(numUnique);
673                 state->weightSum = state->data->rows;
674         }
675         else {
676                 if (omxDataColumnIsFactor(data, weightCol)) {
677                         omxRaiseErrorf("%s: weightColumn %d is a factor", oo->name, 1 + weightCol);
678                         return;
679                 }
680                 state->grp.rowWeight = omxDoubleDataColumn(data, weightCol);
681                 state->weightSum = 0;
682                 for (int rx=0; rx < data->rows; ++rx) { state->weightSum += state->grp.rowWeight[rx]; }
683                 rowMap.resize(data->rows);
684                 for (size_t rx=0; rx < rowMap.size(); ++rx) {
685                         rowMap[rx] = rx;
686                 }
687         }
688         // complain about non-integral rowWeights (EAP can't work) TODO
689
690         const double *colMap; // should be integer TODO
691         {
692         ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("dataColumns")));
693         if (Rf_length(tmp) != numItems) Rf_error("dataColumns must be length %d", numItems);
694         colMap = REAL(tmp);
695         }
696
697         for (int cx = 0; cx < numItems; cx++) {
698                 int *col = omxIntDataColumnUnsafe(data, colMap[cx]);
699                 state->grp.dataColumns.push_back(col);
700         }
701
702         // sanity check data
703         for (int cx = 0; cx < numItems; cx++) {
704                 if (!omxDataColumnIsFactor(data, colMap[cx])) {
705                         omxPrintData(data, "diagnostic", 3);
706                         omxRaiseErrorf("%s: column %d is not a factor", oo->name, int(1 + colMap[cx]));
707                         return;
708                 }
709         }
710
711         // TODO the max outcome should be available from omxData
712         for (int rx=0; rx < data->rows; rx++) {
713                 int cols = 0;
714                 for (int cx = 0; cx < numItems; cx++) {
715                         const int *col = state->grp.dataColumns[cx];
716                         int pick = col[rx];
717                         if (pick == NA_INTEGER) continue;
718                         ++cols;
719                         const int no = state->grp.itemOutcomes[cx];
720                         if (pick > no) {
721                                 Rf_error("Data for item '%s' has at least %d outcomes, not %d",
722                                          state->itemParam->colnames[cx], pick, no);
723                         }
724                 }
725                 if (cols == 0) {
726                         Rf_error("Row %d has all NAs", 1+rx);
727                 }
728         }
729
730         if (state->_latentMeanOut && state->_latentMeanOut->rows * state->_latentMeanOut->cols != maxAbilities) {
731                 Rf_error("The mean matrix '%s' must be a row or column vector of size %d",
732                       state->_latentMeanOut->name, maxAbilities);
733         }
734
735         if (state->_latentCovOut && (state->_latentCovOut->rows != maxAbilities ||
736                                     state->_latentCovOut->cols != maxAbilities)) {
737                 Rf_error("The cov matrix '%s' must be %dx%d",
738                       state->_latentCovOut->name, maxAbilities, maxAbilities);
739         }
740
741         state->grp.setLatentDistribution(maxAbilities,
742                                          state->_latentMeanOut? state->_latentMeanOut->data : NULL,
743                                          state->_latentCovOut? state->_latentCovOut->data : NULL);
744         state->grp.detectTwoTier();
745
746         if (state->verbose >= 1 && state->grp.numSpecific) {
747                 mxLog("%s: Two-tier structure detected; "
748                       "%d abilities reduced to %d dimensions",
749                       oo->name, maxAbilities, maxAbilities - state->grp.numSpecific + 1);
750         }
751
752         // TODO: Items with zero loadings can be replaced with equivalent items
753         // with fewer factors. This would speed up calculation of derivatives.
754
755         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("minItemsPerScore")));
756         state->grp.setMinItemsPerScore(Rf_asInteger(tmp));
757         }
758
759         state->grp.sanityCheck();
760
761         state->grp.buildRowSkip();
762
763         if (isErrorRaised()) return;
764
765         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("debugInternal")));
766         state->debugInternal = Rf_asLogical(tmp);
767         }
768
769         state->ElatentVersion = 0;
770         if (state->_latentMeanOut) {
771                 state->estLatentMean = omxInitMatrix(maxAbilities, 1, TRUE, currentState);
772                 omxCopyMatrix(state->estLatentMean, state->_latentMeanOut); // rename matrices TODO
773         }
774         if (state->_latentCovOut) {
775                 state->estLatentCov = omxInitMatrix(maxAbilities, maxAbilities, TRUE, currentState);
776                 omxCopyMatrix(state->estLatentCov, state->_latentCovOut);
777         }
778 }
779
780 const char *BA81Expect::getLatentIncompatible(BA81Expect *other)
781 {
782         // NOTE: grp.quad not initialized yet
783         // make method of ifaGroup ?
784         if (grp.itemOutcomes != other->grp.itemOutcomes) return "items";
785         if (grp.maxAbilities != other->grp.maxAbilities) return "number of factors";
786         if (grp.numSpecific != other->grp.numSpecific) return "number of specific factors";
787         if (grp.qpoints != other->grp.qpoints) return "qpoints";
788         if (grp.qwidth != other->grp.qwidth) return "qwidth";
789         return 0;
790 }
791