Precompute quadrature locations
[openmx:openmx.git] / src / omxFitFunctionBA81.cpp
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "omxFitFunction.h"
19 #include "omxExpectationBA81.h"
20 #include "omxOpenmpWrap.h"
21 #include "libifa-rpf.h"
22
23 struct BA81FitState {
24
25         bool haveLatentMap;
26         std::vector<int> latentMap;
27         bool freeLatents;
28
29         bool haveItemMap;
30         int itemDerivPadSize;     // maxParam + maxParam*(1+maxParam)/2
31         std::vector<int> paramFlavor;        // freeParam
32         std::vector<int> paramMap;           // itemParam->cols * itemDerivPadSize -> index of free parameter
33         std::vector<int> paramLocations;     // param# -> count of appearances in ItemParam
34         std::vector<int> itemParamFree;      // itemParam->cols * itemParam->rows
35         std::vector<int> ihessDivisor;       // freeParam * freeParam
36
37         std::vector< FreeVarGroup* > varGroups;
38         size_t numItemParam;
39
40         omxMatrix *itemParam;
41         omxMatrix *latentMean;
42         omxMatrix *latentCov;
43
44         BA81FitState();
45         ~BA81FitState();
46         void copyEstimates(BA81Expect *estate);
47 };
48
49 BA81FitState::BA81FitState()
50 {
51         haveItemMap = false;
52         haveLatentMap = false;
53         freeLatents = false;
54 }
55
56 void BA81FitState::copyEstimates(BA81Expect *estate)
57 {
58         omxCopyMatrix(itemParam, estate->itemParam);
59         omxCopyMatrix(latentMean, estate->latentMeanOut);
60         omxCopyMatrix(latentCov, estate->latentCovOut);
61 }
62
63 static void buildLatentParamMap(omxFitFunction* oo, FitContext *fc)
64 {
65         FreeVarGroup *fvg = fc->varGroup;
66         BA81FitState *state = (BA81FitState *) oo->argStruct;
67         std::vector<int> &latentMap = state->latentMap;
68         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
69         int meanNum = estate->latentMeanOut->matrixNumber;
70         int covNum = estate->latentCovOut->matrixNumber;
71         int itemNum = estate->itemParam->matrixNumber;
72         int maxAbilities = estate->maxAbilities;
73         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
74
75         latentMap.assign(numLatents, -1);
76
77         int numParam = int(fvg->vars.size());
78         for (int px=0; px < numParam; px++) {
79                 omxFreeVar *fv = fvg->vars[px];
80                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
81                         omxFreeVarLocation *loc = &fv->locations[lx];
82                         int matNum = ~loc->matrix;
83                         if (matNum == meanNum) {
84                                 latentMap[loc->row + loc->col] = px;
85                                 state->freeLatents = true;
86                         } else if (matNum == covNum) {
87                                 int a1 = loc->row;
88                                 int a2 = loc->col;
89                                 if (a1 < a2) std::swap(a1, a2);
90                                 int cell = maxAbilities + triangleLoc1(a1) + a2;
91                                 if (latentMap[cell] == -1) {
92                                         latentMap[cell] = px;
93
94                                         if (a1 == a2 && fv->lbound == NEG_INF) {
95                                                 fv->lbound = 1e-6;  // variance must be positive
96                                                 if (fc->est[px] < fv->lbound) {
97                                                         error("Starting value for variance %s is negative", fv->name);
98                                                 }
99                                         }
100                                 } else if (latentMap[cell] != px) {
101                                         // doesn't work for multigroup constraints TODO
102                                         error("In covariance matrix, %s and %s must be constrained equal to preserve symmetry",
103                                               fvg->vars[latentMap[cell]]->name, fv->name);
104                                 }
105                                 state->freeLatents = true;
106                         } else if (matNum == itemNum) {
107                                 omxRaiseErrorf(globalState, "The fitfunction free.set should consist of "
108                                                "latent distribution parameters, excluding item parameters");
109                         }
110                 }
111         }
112         state->haveLatentMap = TRUE;
113 }
114
115 static void buildItemParamMap(omxFitFunction* oo, FitContext *fc)
116 {
117         FreeVarGroup *fvg = fc->varGroup;
118         BA81FitState *state = (BA81FitState *) oo->argStruct;
119         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
120         omxMatrix *itemParam = estate->itemParam;
121         int size = itemParam->cols * state->itemDerivPadSize;
122         state->paramMap.assign(size, -1);  // matrix location to free param index
123         state->itemParamFree.assign(itemParam->rows * itemParam->cols, FALSE);
124
125         size_t numFreeParams = state->numItemParam = fvg->vars.size();
126         state->paramLocations.assign(numFreeParams, 0);
127         state->paramFlavor.assign(numFreeParams, -1);
128
129         for (size_t px=0; px < numFreeParams; px++) {
130                 omxFreeVar *fv = fvg->vars[px];
131                 state->paramLocations[px] = int(fv->locations.size());
132                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
133                         omxFreeVarLocation *loc = &fv->locations[lx];
134                         int matNum = ~loc->matrix;
135                         // prohibit mean & cov TODO
136                         if (matNum == itemParam->matrixNumber) {
137                                 int at = loc->col * state->itemDerivPadSize + loc->row;
138                                 state->paramMap[at] = px;
139                                 state->itemParamFree[loc->col * itemParam->rows + loc->row] = TRUE;
140
141                                 const double *spec = estate->itemSpec[loc->col];
142                                 int id = spec[RPF_ISpecID];
143                                 int flavor;
144                                 double upper, lower;
145                                 (*rpf_model[id].paramInfo)(spec, loc->row, &flavor, &upper, &lower);
146                                 if (state->paramFlavor[px] < 0) {
147                                         state->paramFlavor[px] = flavor;
148                                 } else if (state->paramFlavor[px] != flavor) {
149                                         error("Cannot equate %s with %s[%d,%d]", fv->name,
150                                               itemParam->name, loc->row, loc->col);
151                                 }
152                                 if (fv->lbound == NEG_INF && isfinite(lower)) {
153                                         fv->lbound = lower;
154                                         if (fc->est[px] < fv->lbound) {
155                                                 error("Starting value %s %f less than lower bound %f",
156                                                       fv->name, fc->est[px], lower);
157                                         }
158                                 }
159                                 if (fv->ubound == INF && isfinite(upper)) {
160                                         fv->ubound = upper;
161                                         if (fc->est[px] > fv->ubound) {
162                                                 error("Starting value %s %f greater than upper bound %f",
163                                                       fv->name, fc->est[px], upper);
164                                         }
165                                 }
166                         }
167                 }
168         }
169
170         state->ihessDivisor.resize(size);
171
172         for (int cx=0; cx < itemParam->cols; ++cx) {
173                 const double *spec = estate->itemSpec[cx];
174                 int id = spec[RPF_ISpecID];
175                 int numParam = (*rpf_model[id].numParam)(spec);
176
177                 for (int p1=0; p1 < numParam; p1++) {
178                         int at1 = state->paramMap[cx * state->itemDerivPadSize + p1];
179                         if (at1 < 0) continue;
180
181                         for (int p2=0; p2 <= p1; p2++) {
182                                 int at2 = state->paramMap[cx * state->itemDerivPadSize + p2];
183                                 if (at2 < 0) continue;
184
185                                 if (at1 < at2) std::swap(at1, at2);  // lower triangle
186
187                                 //mxLog("Item %d param(%d,%d) -> H[%d,%d]", cx, p1, p2, at1, at2);
188                                 int at = cx * state->itemDerivPadSize + numParam + triangleLoc1(p1) + p2;
189                                 state->paramMap[at] = numFreeParams + at1 * numFreeParams + at2;
190
191                                 state->ihessDivisor[at] =
192                                         state->paramLocations[at1] * state->paramLocations[at2];
193                         }
194                 }
195         }
196
197         state->haveItemMap = TRUE;
198         //pia(state->paramMap.data(), state->itemDerivPadSize, itemParam->cols);
199 }
200
201 static double
202 ba81ComputeEMFit(omxFitFunction* oo, int want, FitContext *fc)
203 {
204         BA81FitState *state = (BA81FitState*) oo->argStruct;
205         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
206         omxMatrix *customPrior = estate->customPrior;
207         omxMatrix *itemParam = estate->itemParam;
208         std::vector<const double*> &itemSpec = estate->itemSpec;
209         std::vector<int> &cumItemOutcomes = estate->cumItemOutcomes;
210         const int maxDims = estate->maxDims;
211         const size_t numItems = estate->itemSpec.size();
212         const int do_fit = want & FF_COMPUTE_FIT;
213         const int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN | FF_COMPUTE_IHESSIAN);
214
215         if (estate->verbose) mxLog("%s: em.fit(want fit=%d deriv=%d)", oo->matrix->name, do_fit, do_deriv);
216
217         double ll = 0;
218         if (customPrior) {
219                 omxRecompute(customPrior);
220                 ll = customPrior->data[0];
221                 // need deriv adjustment TODO
222         }
223
224         if (!isfinite(ll)) {
225                 omxPrint(itemParam, "item param");
226                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
227         }
228
229         if (do_fit) ba81OutcomeProb(estate, FALSE, TRUE);
230
231         const int thrDerivSize = itemParam->cols * state->itemDerivPadSize;
232         std::vector<double> thrDeriv(thrDerivSize * Global->numThreads);
233         double *wherePrep = estate->wherePrep.data();
234
235 #pragma omp parallel for num_threads(Global->numThreads) reduction(+:ll)
236         for (size_t ix=0; ix < numItems; ix++) {
237                 const int thrId = omx_absolute_thread_num();
238                 const double *spec = estate->itemSpec[ix];
239                 const int id = spec[RPF_ISpecID];
240                 const rpf_dLL1_t dLL1 = rpf_model[id].dLL1;
241                 const int iOutcomes = estate->itemOutcomes[ix];
242                 const int outcomeBase = cumItemOutcomes[ix] * estate->totalQuadPoints;
243                 const double *weight = estate->expected + outcomeBase;
244                 const double *oProb = estate->outcomeProb + outcomeBase;
245                 const double *iparam = omxMatrixColumn(itemParam, ix);
246                 double *myDeriv = thrDeriv.data() + thrDerivSize * thrId + ix * state->itemDerivPadSize;
247
248                 for (long qx=0; qx < estate->totalQuadPoints; qx++) {
249                         if (do_fit) {
250                                 for (int ox=0; ox < iOutcomes; ox++) {
251                                         ll += weight[ox] * oProb[ox];
252                                 }
253                         }
254                         if (do_deriv) {
255                                 (*dLL1)(spec, iparam, wherePrep + qx * maxDims, weight, myDeriv);
256                         }
257                         weight += iOutcomes;
258                         oProb += iOutcomes;
259                 }
260         }
261
262         size_t excluded = 0;
263
264         if (do_deriv) {
265                 double *deriv0 = thrDeriv.data();
266
267                 int perThread = itemParam->cols * state->itemDerivPadSize;
268                 for (int th=1; th < Global->numThreads; th++) {
269                         double *thrD = thrDeriv.data() + th * perThread;
270                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
271                 }
272
273                 for (size_t ix=0; ix < numItems; ix++) {
274                         const double *spec = itemSpec[ix];
275                         int id = spec[RPF_ISpecID];
276                         double *iparam = omxMatrixColumn(itemParam, ix);
277                         double *pad = deriv0 + ix * state->itemDerivPadSize;
278                         (*rpf_model[id].dLL2)(spec, iparam, pad);
279                 }
280
281                 int numFreeParams = int(state->numItemParam);
282                 int numParams = itemParam->cols * state->itemDerivPadSize;
283                 for (int ox=0; ox < numParams; ox++) {
284                         int to = state->paramMap[ox];
285                         if (to == -1) continue;
286
287                         // Need to check because this can happen if
288                         // lbounds/ubounds are not set appropriately.
289                         if (0 && !isfinite(deriv0[ox])) {
290                                 int item = ox / itemParam->rows;
291                                 mxLog("item parameters:\n");
292                                 const double *spec = itemSpec[item];
293                                 int id = spec[RPF_ISpecID];
294                                 int numParam = (*rpf_model[id].numParam)(spec);
295                                 double *iparam = omxMatrixColumn(itemParam, item);
296                                 pda(iparam, numParam, 1);
297                                 // Perhaps bounds can be pulled in from librpf? TODO
298                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
299                                       ox, item, deriv0[ox]);
300                         }
301
302                         if (to < numFreeParams) {
303                                 if (want & FF_COMPUTE_GRADIENT) {
304                                         fc->grad[to] += deriv0[ox];
305                                 }
306                         } else {
307                                 if (want & FF_COMPUTE_HESSIAN) {
308                                         int Hto = to - numFreeParams;
309                                         fc->hess[Hto] += deriv0[ox];
310                                 }
311                         }
312                 }
313
314                 if (want & FF_COMPUTE_IHESSIAN) {
315                         for (size_t ix=0; ix < numItems; ix++) {
316                                 const double *spec = itemSpec[ix];
317                                 int id = spec[RPF_ISpecID];
318                                 int iParams = (*rpf_model[id].numParam)(spec);
319                                 double *pad = deriv0 + ix * state->itemDerivPadSize + iParams;
320                                 int *mask = state->itemParamFree.data() + ix * itemParam->rows;
321                                 double stress;
322                                 omxApproxInvertPackedPosDefTriangular(iParams, mask, pad, &stress);
323                                 if (stress) ++excluded;
324                         }
325
326                         for (int ox=0; ox < numParams; ox++) {
327                                 int to = state->paramMap[ox];
328                                 if (to == -1) continue;
329                                 if (to >= numFreeParams) {
330                                         int Hto = to - numFreeParams;
331                                         fc->ihess[Hto] += deriv0[ox] / state->ihessDivisor[ox];
332                                 }
333                         }
334
335                 }
336         }
337
338         if (excluded && estate->verbose >= 1) {
339                 mxLog("%s: Hessian not positive definite for %lu/%lu items",
340                       oo->matrix->name, excluded, numItems);
341         }
342         if (excluded > numItems/2) {
343                 // maybe not fatal, but investigation needed
344                 omxRaiseErrorf(globalState, "Hessian not positive definite for %lu/%lu items",
345                                excluded, numItems);
346         }
347
348         return -ll;
349 }
350
351 void ba81SetFreeVarGroup(omxFitFunction *oo, FreeVarGroup *fvg)
352 {}
353
354 static void setLatentStartingValues(omxFitFunction *oo, FitContext *fc)
355 {
356         BA81FitState *state = (BA81FitState*) oo->argStruct;
357         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
358         std::vector<int> &latentMap = state->latentMap;
359         std::vector<double> &ElatentMean = estate->ElatentMean;
360         std::vector<double> &ElatentCov = estate->ElatentCov;
361         int maxAbilities = estate->maxAbilities;
362
363         if (!estate->Qpoint.size()) return; // if evaluating fit without estimating model
364
365         // can use Ramsay for these parameters without messing up SEM SEs? TODO
366         fc->changedEstimates = true;
367
368         for (int a1 = 0; a1 < maxAbilities; ++a1) {
369                 if (latentMap[a1] >= 0) {
370                         int to = latentMap[a1];
371                         fc->est[to] = ElatentMean[a1];
372                 }
373
374                 for (int a2 = 0; a2 <= a1; ++a2) {
375                         int to = latentMap[maxAbilities + triangleLoc1(a1) + a2];
376                         if (to < 0) continue;
377                         fc->est[to] = ElatentCov[a1 * maxAbilities + a2];
378                 }
379         }
380
381         //fc->log("setLatentStartingValues", FF_COMPUTE_ESTIMATE);
382 }
383
384 static double
385 ba81ComputeFit(omxFitFunction* oo, int want, FitContext *fc)
386 {
387         BA81FitState *state = (BA81FitState*) oo->argStruct;
388         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
389
390         if (estate->type == EXPECTATION_AUGMENTED) {
391                 if (!state->haveItemMap) buildItemParamMap(oo, fc);
392
393                 if (state->numItemParam != fc->varGroup->vars.size()) error("mismatch"); // remove TODO
394
395                 if (want & FF_COMPUTE_PARAMFLAVOR) {
396                         for (size_t px=0; px < state->numItemParam; ++px) {
397                                 if (state->paramFlavor[px] < 0) continue;
398                                 fc->flavor[px] = state->paramFlavor[px];
399                         }
400                         return 0;
401                 }
402
403                 if (want & FF_COMPUTE_PREOPTIMIZE) {
404                         omxExpectationCompute(oo->expectation, NULL);
405                         // schilling_bock_2005_rescale(oo, fc); seems counterproductive
406                         return 0;
407                 }
408
409                 double got = ba81ComputeEMFit(oo, want, fc);
410                 return got;
411         } else if (estate->type == EXPECTATION_OBSERVED) {
412                 if (!state->haveLatentMap) buildLatentParamMap(oo, fc);
413
414                 if (want & FF_COMPUTE_PREOPTIMIZE) {
415                         if (state->freeLatents) {
416                                 setLatentStartingValues(oo, fc);
417                                 omxExpectationCompute(oo->expectation, NULL);
418                         }
419                         return 0;
420                 }
421
422                 if (want & (FF_COMPUTE_GRADIENT|FF_COMPUTE_HESSIAN)) {
423                         warning("%s: Derivs are not available for latent distribution parameters", oo->matrix->name);
424                 }
425
426                 if (want & FF_COMPUTE_MAXABSCHANGE) {
427                         double mac = std::max(omxMaxAbsDiff(state->itemParam, estate->itemParam),
428                                               omxMaxAbsDiff(state->latentMean, estate->latentMeanOut));
429                         fc->mac = std::max(mac, omxMaxAbsDiff(state->latentCov, estate->latentCovOut));
430                         state->copyEstimates(estate);
431                 }
432
433                 if (want & FF_COMPUTE_FIT) {
434                         omxExpectationCompute(oo->expectation, NULL);
435
436                         double *patternLik = estate->patternLik;
437                         int *numIdentical = estate->numIdentical;
438                         int numUnique = estate->numUnique;
439                         estate->excludedPatterns = 0;
440                         const double LogLargest = estate->LogLargestDouble;
441                         double got = 0;
442 #pragma omp parallel for num_threads(Global->numThreads) reduction(+:got)
443                         for (int ux=0; ux < numUnique; ux++) {
444                                 if (!validPatternLik(estate, patternLik[ux])) {
445 #pragma omp atomic
446                                         ++estate->excludedPatterns;
447                                         // somehow indicate that this -2LL is provisional TODO
448                                         continue;
449                                 }
450                                 got += numIdentical[ux] * (log(patternLik[ux]) - LogLargest);
451                         }
452                         if (estate->verbose) mxLog("%s: fit (%d/%d excluded)",
453                                                    oo->matrix->name, estate->excludedPatterns, numUnique);
454                         //mxLog("fit %.4f", -2 * got);
455                         return -2 * got;
456                 }
457
458                 return 0;
459         } else {
460                 error("Confused");
461         }
462 }
463
464 static void ba81Compute(omxFitFunction *oo, int want, FitContext *fc)
465 {
466         if (!want) return;
467         double got = ba81ComputeFit(oo, want, fc);
468         if (got) oo->matrix->data[0] = got;
469 }
470
471 BA81FitState::~BA81FitState()
472 {
473         omxFreeAllMatrixData(itemParam);
474         omxFreeAllMatrixData(latentMean);
475         omxFreeAllMatrixData(latentCov);
476 }
477
478 static void ba81Destroy(omxFitFunction *oo) {
479         BA81FitState *state = (BA81FitState *) oo->argStruct;
480         delete state;
481 }
482
483 void omxInitFitFunctionBA81(omxFitFunction* oo)
484 {
485         if (!oo->argStruct) { // ugh!
486                 BA81FitState *state = new BA81FitState;
487                 oo->argStruct = state;
488         }
489
490         BA81FitState *state = (BA81FitState*) oo->argStruct;
491
492         omxExpectation *expectation = oo->expectation;
493         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
494
495         //newObj->data = oo->expectation->data;
496
497         oo->computeFun = ba81Compute;
498         oo->setVarGroup = ba81SetFreeVarGroup;
499         oo->destructFun = ba81Destroy;
500         oo->gradientAvailable = TRUE;
501         oo->hessianAvailable = TRUE;
502         oo->parametersHaveFlavor = TRUE;
503
504         int maxParam = estate->itemParam->rows;
505         state->itemDerivPadSize = maxParam + triangleLoc1(maxParam);
506
507         int numItems = estate->itemParam->cols;
508         for (int ix=0; ix < numItems; ix++) {
509                 const double *spec = estate->itemSpec[ix];
510                 int id = spec[RPF_ISpecID];
511                 if (id < 0 || id >= rpf_numModels) {
512                         error("ItemSpec %d has unknown item model %d", ix, id);
513                 }
514         }
515
516         
517
518         state->itemParam = omxInitMatrix(NULL, 0, 0, TRUE, globalState);
519         state->latentMean = omxInitMatrix(NULL, 0, 0, TRUE, globalState);
520         state->latentCov = omxInitMatrix(NULL, 0, 0, TRUE, globalState);
521         state->copyEstimates(estate);
522 }