Handle model.mat[R,C] labels in BA81 latent distribution
[openmx:openmx.git] / src / omxExpectationBA81.cpp
1 /*
2   Copyright 2012-2014 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <limits>
19 #include <Rmath.h>
20
21 #include "omxExpectationBA81.h"
22 #include "glue.h"
23 #include "libifa-rpf.h"
24 #include "dmvnorm.h"
25 #include "omxBuffer.h"
26 #include "matrix.h"
27
28 const struct rpf *rpf_model = NULL;
29 int rpf_numModels;
30
31 void pda(const double *ar, int rows, int cols)
32 {
33         if (rows == 0 || cols == 0) return;
34         std::string buf;
35         for (int rx=0; rx < rows; rx++) {   // column major order
36                 for (int cx=0; cx < cols; cx++) {
37                         buf += string_snprintf("%.6g, ", ar[cx * rows + rx]);
38                 }
39                 buf += "\n";
40         }
41         mxLogBig(buf);
42 }
43
44 void pia(const int *ar, int rows, int cols)
45 {
46         if (rows == 0 || cols == 0) return;
47         std::string buf;
48         for (int rx=0; rx < rows; rx++) {   // column major order
49                 for (int cx=0; cx < cols; cx++) {
50                         buf += string_snprintf("%d, ", ar[cx * rows + rx]);
51                 }
52                 buf += "\n";
53         }
54         mxLogBig(buf);
55 }
56
57 template <typename T>
58 void BA81LatentFixed<T>::normalizeWeights(class ifaGroup *grp, T extraData,
59                                           int px, double *Qweight, double patternLik1, int thrId)
60 {
61         double weight = grp->rowWeight[px] / patternLik1;
62         int pts = grp->quad.weightTableSize;
63         for (int qx=0; qx < pts; ++qx) {
64                 Qweight[qx] *= weight;
65         }
66 }
67
68 template <typename T>
69 void BA81LatentSummary<T>::begin(class ifaGroup *state, T extraData)
70 {
71         thrDweight.assign(state->quad.weightTableSize * Global->numThreads, 0.0);
72         ba81NormalQuad &quad = state->quad;
73         numLatents = quad.maxAbilities + triangleLoc1(quad.maxAbilities);
74         latentDist.assign(numLatents, 0.0);
75 }
76
77 template <typename T>
78 void BA81LatentSummary<T>::normalizeWeights(class ifaGroup *grp, T extraData,
79                                             int px, double *Qweight, double patternLik1, int thrId)
80 {
81         double weight = grp->rowWeight[px] / patternLik1;
82         int pts = grp->quad.weightTableSize;
83         double *Dweight = thrDweight.data() + pts * thrId;
84         for (int qx=0; qx < pts; ++qx) {
85                 double tmp = Qweight[qx] * weight;
86                 Dweight[qx] += tmp;
87                 Qweight[qx] = tmp;
88         }
89 }
90
91 template <typename T>
92 void BA81LatentSummary<T>::end(class ifaGroup *grp, T extraData)
93 {
94         int pts = grp->quad.weightTableSize;
95
96         for (int tx=1; tx < Global->numThreads; ++tx) {
97                 double *Dweight = thrDweight.data() + pts * tx;
98                 double *dest = thrDweight.data();
99                 for (int qx=0; qx < pts; ++qx) {
100                         dest[qx] += Dweight[qx];
101                 }
102         }
103
104         ba81NormalQuad &quad = grp->quad;
105         quad.EAP(thrDweight.data(), 1/extraData->weightSum, latentDist.data());
106
107         omxMatrix *meanOut = extraData->estLatentMean;
108         omxMatrix *covOut = extraData->estLatentCov;
109         const int maxAbilities = quad.maxAbilities;
110         const int primaryDims = quad.primaryDims;
111
112         double *latentDist1 = latentDist.data();
113         if (meanOut) {
114                 for (int d1=0; d1 < maxAbilities; d1++) {
115                         omxSetVectorElement(meanOut, d1, latentDist1[d1]);
116                 }
117         }
118
119         if (covOut) {
120                 for (int d1=0; d1 < primaryDims; d1++) {
121                         int cx = maxAbilities + triangleLoc1(d1);
122                         for (int d2=0; d2 <= d1; d2++) {
123                                 double cov = latentDist1[cx];
124                                 omxSetMatrixElement(covOut, d1, d2, cov);
125                                 if (d1 != d2) omxSetMatrixElement(covOut, d2, d1, cov);
126                                 ++cx;
127                         }
128                 }
129                 for (int d1=primaryDims; d1 < maxAbilities; d1++) {
130                         int loc = maxAbilities + triangleLoc0(d1);
131                         omxSetMatrixElement(covOut, d1, d1, latentDist1[loc]);
132                 }
133         }
134
135         ++extraData->ElatentVersion;
136 }
137
138 template <typename T, typename CovType>
139 void BA81Estep<T, CovType>::begin(ifaGroup *state, T extraData)
140 {
141         ba81NormalQuad &quad = state->quad;
142         thrExpected.assign(state->totalOutcomes * quad.totalQuadPoints * Global->numThreads, 0.0);
143 }
144
145 template <typename T, typename CovType>
146 void BA81Estep<T, CovType>::addRow(class ifaGroup *state, T extraData, int px, double *Qweight, int thrId)
147 {
148         double *out = thrExpected.data() + thrId * state->totalOutcomes * state->quad.totalQuadPoints;
149         BA81EstepBase<CovType>::addRow1(state, px, Qweight, out);
150 }
151
152 template<>
153 void BA81EstepBase<BA81Dense>::addRow1(class ifaGroup *grp, int px, double *Qweight, double *out)
154 {
155         std::vector<int> &rowMap = grp->rowMap;
156         std::vector<int> &itemOutcomes = grp->itemOutcomes;
157         ba81NormalQuad &quad = grp->getQuad();
158         const int totalQuadPoints = quad.totalQuadPoints;
159
160         for (int ix=0; ix < grp->numItems(); ++ix) {
161                 int pick = grp->dataColumns[ix][rowMap[px]];
162                 if (pick == NA_INTEGER) {
163                         out += itemOutcomes[ix] * totalQuadPoints;
164                         continue;
165                 }
166                 pick -= 1;
167
168                 for (int qx=0; qx < totalQuadPoints; ++qx) {
169                         out[pick] += Qweight[qx];
170                         out += itemOutcomes[ix];
171                 }
172         }
173 }
174
175 template<>
176 void BA81EstepBase<BA81TwoTier>::addRow1(class ifaGroup *grp, int px, double *Qweight, double *out)
177 {
178         std::vector<int> &rowMap = grp->rowMap;
179         std::vector<int> &itemOutcomes = grp->itemOutcomes;
180         ba81NormalQuad &quad = grp->getQuad();
181         const int numSpecific = quad.numSpecific;
182         const int totalQuadPoints = quad.totalQuadPoints;
183
184         for (int ix=0; ix < grp->numItems(); ++ix) {
185                 int pick = grp->dataColumns[ix][rowMap[px]];
186                 if (pick == NA_INTEGER) {
187                         out += itemOutcomes[ix] * totalQuadPoints;
188                         continue;
189                 }
190                 pick -= 1;
191
192                 int Sgroup = grp->Sgroup[ix];
193                 double *Qw = Qweight;
194                 for (int qx=0; qx < totalQuadPoints; ++qx) {
195                         out[pick] += Qw[Sgroup];
196                         out += itemOutcomes[ix];
197                         Qw += numSpecific;
198                 }
199         }
200 }
201
202 template <typename T, typename CovType>
203 void BA81Estep<T, CovType>::recordTable(class ifaGroup *state, T extraData)
204 {
205         const int numThreads = Global->numThreads;
206         ba81NormalQuad &quad = state->getQuad();
207         const int expectedSize = quad.totalQuadPoints * state->totalOutcomes;
208         double *e1 = thrExpected.data();
209
210         extraData->expected = Realloc(extraData->expected, state->totalOutcomes * quad.totalQuadPoints, double);
211         memcpy(extraData->expected, e1, sizeof(double) * expectedSize);
212         e1 += expectedSize;
213
214         for (int tx=1; tx < numThreads; ++tx) {
215                 for (int ex=0; ex < expectedSize; ++ex) {
216                         extraData->expected[ex] += *e1;
217                         ++e1;
218                 }
219         }
220 }
221
222 static int getLatentVersion(BA81Expect *state)
223 {
224         int vv = 1;  // to ensure it doesn't match on the first test
225         if (state->_latentMeanOut) vv += omxGetMatrixVersion(state->_latentMeanOut);
226         if (state->_latentCovOut) vv += omxGetMatrixVersion(state->_latentCovOut);
227         return vv;
228 }
229
230 // Attempt G-H grid? http://dbarajassolano.wordpress.com/2012/01/26/on-sparse-grid-quadratures/
231 void ba81SetupQuadrature(omxExpectation* oo)
232 {
233         BA81Expect *state = (BA81Expect *) oo->argStruct;
234         ba81NormalQuad &quad = state->getQuad();
235         bool latentClean = state->latentParamVersion == getLatentVersion(state);
236         if (quad.Qpoint.size() == 0 && latentClean) return;
237
238         int maxAbilities = state->grp.maxAbilities;
239         if (maxAbilities == 0) {
240                 quad.setup0();
241                 state->latentParamVersion = getLatentVersion(state);
242                 return;
243         }
244
245         Eigen::VectorXd mean;
246         Eigen::MatrixXd fullCov;
247         state->getLatentDistribution(NULL, mean, fullCov);
248
249         if (state->verbose >= 1) {
250                 mxLog("%s: quadrature(%d)", oo->name, getLatentVersion(state));
251                 if (state->verbose >= 2) {
252                         pda(mean.data(), 1, maxAbilities);
253                         pda(fullCov.data(), maxAbilities, maxAbilities);
254                 }
255         }
256
257         int numSpecific = state->grp.numSpecific;
258         int priDims = maxAbilities - state->grp.numSpecific;
259         Eigen::MatrixXd cov = fullCov.topLeftCorner(priDims, priDims);
260         Eigen::VectorXd sVar(numSpecific);
261
262         // This is required because the EM acceleration can push the
263         // covariance matrix to be slightly non-pd when predictors
264         // are highly correlated.
265         if (priDims == 1) {
266                 if (cov(0,0) < BA81_MIN_VARIANCE) cov(0,0) = BA81_MIN_VARIANCE;
267         } else {
268                 Matrix mat(cov.data(), priDims, priDims);
269                 InplaceForcePosSemiDef(mat, NULL, NULL);
270         }
271
272         for (int sx=0; sx < numSpecific; ++sx) {
273                 int loc = priDims + sx;
274                 double tmp = fullCov(loc, loc);
275                 if (tmp < BA81_MIN_VARIANCE) tmp = BA81_MIN_VARIANCE;
276                 sVar(sx) = tmp;
277         }
278
279         quad.setup(state->grp.qwidth, state->grp.qpoints, mean.data(), cov, sVar);
280
281         state->latentParamVersion = getLatentVersion(state);
282 }
283
284 void refreshPatternLikelihood(BA81Expect *state, bool hasFreeLatent)
285 {
286         ba81NormalQuad &quad = state->getQuad();
287
288         if (hasFreeLatent) {
289                 if (quad.numSpecific == 0) {
290                         BA81Engine<typeof(state), BA81Dense, BA81LatentSummary, BA81OmitEstep> engine;
291                         engine.ba81Estep1(&state->grp, state);
292                 } else {
293                         BA81Engine<typeof(state), BA81TwoTier, BA81LatentSummary, BA81OmitEstep> engine;
294                         engine.ba81Estep1(&state->grp, state);
295                 }
296         } else {
297                 if (quad.numSpecific == 0) {
298                         BA81Engine<typeof(state), BA81Dense, BA81LatentFixed, BA81OmitEstep> engine;
299                         engine.ba81Estep1(&state->grp, state);
300                 } else {
301                         BA81Engine<typeof(state), BA81TwoTier, BA81LatentFixed, BA81OmitEstep> engine;
302                         engine.ba81Estep1(&state->grp, state);
303                 }
304         }
305 }
306
307 static void
308 ba81compute(omxExpectation *oo, const char *what, const char *how)
309 {
310         BA81Expect *state = (BA81Expect *) oo->argStruct;
311
312         if (what) {
313                 if (strcmp(what, "latentDistribution")==0 && how && strcmp(how, "copy")==0) {
314                         omxCopyMatrix(state->_latentMeanOut, state->estLatentMean);
315                         omxCopyMatrix(state->_latentCovOut, state->estLatentCov);
316                         return;
317                 }
318
319                 if (strcmp(what, "scores")==0) {
320                         state->type = EXPECTATION_AUGMENTED;
321                 } else if (strcmp(what, "nothing")==0) {
322                         state->type = EXPECTATION_OBSERVED;
323                 } else {
324                         omxRaiseErrorf("%s: don't know how to predict '%s'",
325                                        oo->name, what);
326                 }
327
328                 if (state->verbose >= 1) {
329                         mxLog("%s: predict %s", oo->name, what);
330                 }
331                 return;
332         }
333
334         bool latentClean = state->latentParamVersion == getLatentVersion(state);
335         bool itemClean = state->itemParamVersion == omxGetMatrixVersion(state->itemParam) && latentClean;
336
337         ba81NormalQuad &quad = state->getQuad();
338
339         if (state->verbose >= 1) {
340                 mxLog("%s: Qinit %d itemClean %d latentClean %d (1=clean) expectedUsed=%d",
341                       oo->name, quad.Qpoint.size() != 0, itemClean, latentClean, state->expectedUsed);
342         }
343
344         if (!latentClean) ba81SetupQuadrature(oo);
345
346         if (!itemClean) {
347                 double *param = state->EitemParam? state->EitemParam : state->itemParam->data;
348                 state->grp.ba81OutcomeProb(param, FALSE);
349
350                 bool estep = state->expectedUsed;
351                 if (state->expectedUsed) {
352                         if (quad.numSpecific == 0) {
353                                 if (oo->dynamicDataSource) {
354                                         BA81Engine<typeof(state), BA81Dense, BA81LatentSummary, BA81Estep> engine;
355                                         engine.ba81Estep1(&state->grp, state);
356                                 } else {
357                                         BA81Engine<typeof(state), BA81Dense, BA81LatentFixed, BA81Estep> engine;
358                                         engine.ba81Estep1(&state->grp, state);
359                                 }
360                         } else {
361                                 if (oo->dynamicDataSource) {
362                                         BA81Engine<typeof(state), BA81TwoTier, BA81LatentSummary, BA81Estep> engine;
363                                         engine.ba81Estep1(&state->grp, state);
364                                 } else {
365                                         BA81Engine<typeof(state), BA81TwoTier, BA81LatentFixed, BA81Estep> engine;
366                                         engine.ba81Estep1(&state->grp, state);
367                                 }
368                         }
369                         state->expectedUsed = false;
370                 } else {
371                         Free(state->expected);
372                         refreshPatternLikelihood(state, oo->dynamicDataSource);
373                 }
374                 if (oo->dynamicDataSource && state->verbose >= 2) {
375                         omxPrint(state->estLatentMean, "mean");
376                         omxPrint(state->estLatentCov, "cov");
377                 }
378                 if (state->verbose >= 1) {
379                         const int numUnique = state->getNumUnique();
380                         mxLog("%s: estep(item version %d)<%s, %s, %s> %d/%d rows excluded",
381                               state->name, omxGetMatrixVersion(state->itemParam),
382                               (quad.numSpecific == 0? "dense":"twotier"),
383                               (estep && oo->dynamicDataSource? "summary":"fixed"),
384                               (estep? "estep":"omitEstep"),
385                               state->grp.excludedPatterns, numUnique);
386                 }
387         }
388
389         state->itemParamVersion = omxGetMatrixVersion(state->itemParam);
390 }
391
392 /**
393  * MAP is not affected by the number of items. EAP is. Likelihood can
394  * get concentrated in a single quadrature ordinate. For 3PL, response
395  * patterns can have a bimodal likelihood. This will confuse MAP and
396  * is a key advantage of EAP (Thissen & Orlando, 2001, p. 136).
397  *
398  * Thissen, D. & Orlando, M. (2001). IRT for items scored in two
399  * categories. In D. Thissen & H. Wainer (Eds.), \emph{Test scoring}
400  * (pp 73-140). Lawrence Erlbaum Associates, Inc.
401  */
402 static void
403 ba81PopulateAttributes(omxExpectation *oo, SEXP robj)
404 {
405         BA81Expect *state = (BA81Expect *) oo->argStruct;
406         if (!state->debugInternal) return;
407
408         ba81NormalQuad &quad = state->getQuad();
409         int maxAbilities = quad.maxAbilities;
410         const int numUnique = state->getNumUnique();
411
412         const double LogLargest = state->LogLargestDouble;
413         int totalOutcomes = state->totalOutcomes();
414         SEXP Rlik;
415         SEXP Rexpected;
416
417         if (state->grp.patternLik.size() != numUnique) {
418                 refreshPatternLikelihood(state, oo->dynamicDataSource);
419         }
420
421         Rf_protect(Rlik = Rf_allocVector(REALSXP, numUnique));
422         memcpy(REAL(Rlik), state->grp.patternLik.data(), sizeof(double) * numUnique);
423         double *lik_out = REAL(Rlik);
424         for (int px=0; px < numUnique; ++px) {
425                 // Must return value in log units because it may not be representable otherwise
426                 lik_out[px] = log(lik_out[px]) - LogLargest;
427         }
428
429         MxRList dbg;
430         dbg.add("patternLikelihood", Rlik);
431
432         if (state->expected) {
433                 Rf_protect(Rexpected = Rf_allocVector(REALSXP, quad.totalQuadPoints * totalOutcomes));
434                 memcpy(REAL(Rexpected), state->expected, sizeof(double) * totalOutcomes * quad.totalQuadPoints);
435                 dbg.add("em.expected", Rexpected);
436         }
437
438         SEXP Rmean, Rcov;
439         if (state->estLatentMean) {
440                 Rf_protect(Rmean = Rf_allocVector(REALSXP, maxAbilities));
441                 memcpy(REAL(Rmean), state->estLatentMean->data, maxAbilities * sizeof(double));
442                 dbg.add("mean", Rmean);
443         }
444         if (state->estLatentCov) {
445                 Rf_protect(Rcov = Rf_allocMatrix(REALSXP, maxAbilities, maxAbilities));
446                 memcpy(REAL(Rcov), state->estLatentCov->data, maxAbilities * maxAbilities * sizeof(double));
447                 dbg.add("cov", Rcov);
448         }
449
450         Rf_setAttrib(robj, Rf_install("debug"), dbg.asR());
451 }
452
453 static void ba81Destroy(omxExpectation *oo) {
454         if(OMX_DEBUG) {
455                 mxLog("Freeing %s function.", oo->name);
456         }
457         BA81Expect *state = (BA81Expect *) oo->argStruct;
458         omxFreeMatrix(state->estLatentMean);
459         omxFreeMatrix(state->estLatentCov);
460         omxFreeMatrix(state->numObsMat);
461         Free(state->expected);
462         delete state;
463 }
464
465 static void ignoreSetVarGroup(omxExpectation*, FreeVarGroup *)
466 {}
467
468 static omxMatrix *getComponent(omxExpectation *oo, omxFitFunction*, const char *what)
469 {
470         BA81Expect *state = (BA81Expect *) oo->argStruct;
471
472         if (strcmp(what, "covariance")==0) {
473                 return state->estLatentCov;
474         } else if (strcmp(what, "mean")==0) {
475                 return state->estLatentMean;
476         } else if (strcmp(what, "numObs")==0) {
477                 return state->numObsMat;
478         } else {
479                 return NULL;
480         }
481 }
482
483 void getMatrixDims(SEXP r_theta, int *rows, int *cols)
484 {
485     SEXP matrixDims;
486     ScopedProtect p1(matrixDims, Rf_getAttrib(r_theta, R_DimSymbol));
487     int *dimList = INTEGER(matrixDims);
488     *rows = dimList[0];
489     *cols = dimList[1];
490 }
491
492 void omxInitExpectationBA81(omxExpectation* oo) {
493         omxState* currentState = oo->currentState;      
494         SEXP rObj = oo->rObj;
495         SEXP tmp;
496         
497         if(OMX_DEBUG) {
498                 mxLog("Initializing %s.", oo->name);
499         }
500         if (!rpf_model) {
501                 if (0) {
502                         const int wantVersion = 3;
503                         int version;
504                         get_librpf_t get_librpf = (get_librpf_t) R_GetCCallable("rpf", "get_librpf_model_GPL");
505                         (*get_librpf)(&version, &rpf_numModels, &rpf_model);
506                         if (version < wantVersion) Rf_error("librpf binary API %d installed, at least %d is required",
507                                                          version, wantVersion);
508                 } else {
509                         rpf_numModels = librpf_numModels;
510                         rpf_model = librpf_model;
511                 }
512         }
513         
514         BA81Expect *state = new BA81Expect;
515
516         // These two constants should be as identical as possible
517         state->name = oo->name;
518         state->LogLargestDouble = log(std::numeric_limits<double>::max()) - 1;
519         state->LargestDouble = exp(state->LogLargestDouble);
520         ba81NormalQuad &quad = state->getQuad();
521         quad.setOne(state->LargestDouble);
522
523         state->expectedUsed = true;
524
525         state->numObsMat = NULL;
526         state->estLatentMean = NULL;
527         state->estLatentCov = NULL;
528         state->expected = NULL;
529         state->type = EXPECTATION_OBSERVED;
530         state->itemParam = NULL;
531         state->EitemParam = NULL;
532         state->itemParamVersion = 0;
533         state->latentParamVersion = 0;
534         oo->argStruct = (void*) state;
535
536         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("data")));
537         state->data = omxDataLookupFromState(tmp, currentState);
538         }
539
540         if (strcmp(omxDataType(state->data), "raw") != 0) {
541                 omxRaiseErrorf("%s unable to handle data type %s", oo->name, omxDataType(state->data));
542                 return;
543         }
544
545         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("verbose")));
546         state->verbose = Rf_asInteger(tmp);
547         }
548
549         int targetQpoints;
550         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("qpoints")));
551                 targetQpoints = Rf_asInteger(tmp);
552         }
553
554         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("qwidth")));
555         state->grp.setGridFineness(Rf_asReal(tmp), targetQpoints);
556         }
557
558         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("ItemSpec")));
559         state->grp.importSpec(tmp);
560         if (state->verbose >= 2) mxLog("%s: found %d item specs", oo->name, state->numItems());
561         }
562
563         state->_latentMeanOut = omxNewMatrixFromSlot(rObj, currentState, "mean");
564         state->_latentCovOut  = omxNewMatrixFromSlot(rObj, currentState, "cov");
565
566         state->itemParam = omxNewMatrixFromSlot(rObj, globalState, "item");
567         state->grp.param = state->itemParam->data; // algebra not allowed yet TODO
568
569         const int numItems = state->itemParam->cols;
570         if (state->numItems() != numItems) {
571                 omxRaiseErrorf("ItemSpec length %d must match the number of item columns (%d)",
572                                state->numItems(), numItems);
573                 return;
574         }
575         if (state->itemParam->rows != state->grp.paramRows) {
576                 omxRaiseErrorf("item matrix must have %d rows", state->grp.paramRows);
577                 return;
578         }
579
580         // for algebra item param, will need to defer until later?
581         state->grp.learnMaxAbilities();
582
583         int maxAbilities = state->grp.maxAbilities;
584
585         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("EstepItem")));
586         if (!Rf_isNull(tmp)) {
587                 int rows, cols;
588                 getMatrixDims(tmp, &rows, &cols);
589                 if (rows != state->itemParam->rows || cols != state->itemParam->cols) {
590                         Rf_error("EstepItem must have the same dimensions as the item MxMatrix");
591                 }
592                 state->EitemParam = REAL(tmp);
593         }
594         }
595
596         oo->computeFun = ba81compute;
597         oo->setVarGroup = ignoreSetVarGroup;
598         oo->destructFun = ba81Destroy;
599         oo->populateAttrFun = ba81PopulateAttributes;
600         oo->componentFun = getComponent;
601         oo->canDuplicate = false;
602         
603         // TODO: Exactly identical rows do not contribute any information.
604         // The sorting algorithm ought to remove them so we get better cache behavior.
605         // The following summary stats would be cheaper to calculate too.
606
607         omxData *data = state->data;
608         std::vector<int> &rowMap = state->grp.rowMap;
609
610         int weightCol;
611         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("weightColumn")));
612                 weightCol = INTEGER(tmp)[0];
613         }
614
615         if (weightCol == NA_INTEGER) {
616                 // Should rowMap be part of omxData? This is essentially a
617                 // generic compression step that shouldn't be specific to IFA models.
618                 state->grp.rowWeight = (double*) R_alloc(data->rows, sizeof(double));
619                 rowMap.resize(data->rows);
620                 int numUnique = 0;
621                 for (int rx=0; rx < data->rows; ) {
622                         int rw = omxDataNumIdenticalRows(state->data, rx);
623                         state->grp.rowWeight[numUnique] = rw;
624                         rowMap[numUnique] = rx;
625                         rx += rw;
626                         ++numUnique;
627                 }
628                 rowMap.resize(numUnique);
629                 state->weightSum = state->data->rows;
630         }
631         else {
632                 if (omxDataColumnIsFactor(data, weightCol)) {
633                         omxRaiseErrorf("%s: weightColumn %d is a factor", oo->name, 1 + weightCol);
634                         return;
635                 }
636                 state->grp.rowWeight = omxDoubleDataColumn(data, weightCol);
637                 state->weightSum = 0;
638                 for (int rx=0; rx < data->rows; ++rx) { state->weightSum += state->grp.rowWeight[rx]; }
639                 rowMap.resize(data->rows);
640                 for (size_t rx=0; rx < rowMap.size(); ++rx) {
641                         rowMap[rx] = rx;
642                 }
643         }
644         // complain about non-integral rowWeights (EAP can't work) TODO
645
646         const double *colMap; // should be integer TODO
647         {
648         ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("dataColumns")));
649         if (Rf_length(tmp) != numItems) Rf_error("dataColumns must be length %d", numItems);
650         colMap = REAL(tmp);
651         }
652
653         for (int cx = 0; cx < numItems; cx++) {
654                 int *col = omxIntDataColumnUnsafe(data, colMap[cx]);
655                 state->grp.dataColumns.push_back(col);
656         }
657
658         // sanity check data
659         for (int cx = 0; cx < numItems; cx++) {
660                 if (!omxDataColumnIsFactor(data, colMap[cx])) {
661                         omxRaiseErrorf("%s: column %d is not a factor", oo->name, 1 + colMap[cx]);
662                         return;
663                 }
664
665                 const int *col = state->grp.dataColumns[cx];
666
667                 // TODO this summary stat should be available from omxData
668                 int dataMax=0;
669                 for (int rx=0; rx < data->rows; rx++) {
670                         int pick = col[rx];
671                         if (dataMax < pick)
672                                 dataMax = pick;
673                 }
674                 int no = state->grp.itemOutcomes[cx];
675                 if (dataMax > no) {
676                         omxRaiseErrorf("Data for item %d has %d outcomes, not %d", cx+1, dataMax, no);
677                 }
678         }
679
680         if (state->_latentMeanOut && state->_latentMeanOut->rows * state->_latentMeanOut->cols != maxAbilities) {
681                 Rf_error("The mean matrix '%s' must be a row or column vector of size %d",
682                       state->_latentMeanOut->name, maxAbilities);
683         }
684
685         if (state->_latentCovOut && (state->_latentCovOut->rows != maxAbilities ||
686                                     state->_latentCovOut->cols != maxAbilities)) {
687                 Rf_error("The cov matrix '%s' must be %dx%d",
688                       state->_latentCovOut->name, maxAbilities, maxAbilities);
689         }
690
691         state->grp.setLatentDistribution(maxAbilities,
692                                          state->_latentMeanOut? state->_latentMeanOut->data : NULL,
693                                          state->_latentCovOut? state->_latentCovOut->data : NULL);
694         state->grp.detectTwoTier();
695
696         if (state->verbose >= 1 && state->grp.numSpecific) {
697                 mxLog("%s: Two-tier structure detected; "
698                       "%d abilities reduced to %d dimensions",
699                       oo->name, maxAbilities, maxAbilities - state->grp.numSpecific + 1);
700         }
701
702         // TODO: Items with zero loadings can be replaced with equivalent items
703         // with fewer factors. This would speed up calculation of derivatives.
704
705         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("minItemsPerScore")));
706         state->grp.setMinItemsPerScore(Rf_asInteger(tmp));
707         }
708
709         state->grp.sanityCheck();
710
711         state->grp.buildRowSkip();
712
713         if (isErrorRaised()) return;
714
715         {ScopedProtect p1(tmp, R_do_slot(rObj, Rf_install("debugInternal")));
716         state->debugInternal = Rf_asLogical(tmp);
717         }
718
719         state->ElatentVersion = 0;
720         if (state->_latentMeanOut) {
721                 state->estLatentMean = omxInitMatrix(maxAbilities, 1, TRUE, currentState);
722                 omxCopyMatrix(state->estLatentMean, state->_latentMeanOut); // rename matrices TODO
723         }
724         if (state->_latentCovOut) {
725                 state->estLatentCov = omxInitMatrix(maxAbilities, maxAbilities, TRUE, currentState);
726                 omxCopyMatrix(state->estLatentCov, state->_latentCovOut);
727         }
728         state->numObsMat = omxInitMatrix(1, 1, TRUE, currentState);
729         omxSetVectorElement(state->numObsMat, 0, data->rows);
730 }