OpenMP tweaks
[openmx:openmx.git] / src / omxFitFunctionBA81.cpp
1 /*
2   Copyright 2012-2013 Joshua Nathaniel Pritikin and contributors
3
4   This is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "omxFitFunction.h"
19 #include "omxExpectationBA81.h"
20 #include "omxOpenmpWrap.h"
21 #include "libifa-rpf.h"
22
23 struct BA81FitState {
24
25         bool haveLatentMap;
26         std::vector<int> latentMap;
27
28         bool haveItemMap;
29         int itemDerivPadSize;     // maxParam + maxParam*(1+maxParam)/2
30         std::vector<int> paramFlavor;        // freeParam
31         std::vector<int> paramMap;           // itemParam->cols * itemDerivPadSize -> index of free parameter
32         std::vector<int> paramLocations;     // param# -> count of appearances in ItemParam
33         std::vector<int> itemParamFree;      // itemParam->cols * itemParam->rows
34         std::vector<int> ihessDivisor;       // freeParam * freeParam
35
36         omxMatrix *cholCov;
37         int choleskyError;
38         double *tmpLatentMean;    // maxDims
39         double *tmpLatentCov;     // maxDims * maxDims ; only lower triangle is used
40         omxMatrix *icov;          // inverse covariance matrix
41
42         std::vector< FreeVarGroup* > varGroups;
43         size_t numItemParam;
44
45         BA81FitState();
46         ~BA81FitState();
47 };
48
49 BA81FitState::BA81FitState()
50 {
51         tmpLatentMean = NULL;
52         tmpLatentCov = NULL;
53         haveItemMap = false;
54         haveLatentMap = false;
55 }
56
57 static void buildLatentParamMap(omxFitFunction* oo, FitContext *fc)
58 {
59         FreeVarGroup *fvg = fc->varGroup;
60         BA81FitState *state = (BA81FitState *) oo->argStruct;
61         std::vector<int> &latentMap = state->latentMap;
62         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
63         int meanNum = estate->latentMeanOut->matrixNumber;
64         int covNum = estate->latentCovOut->matrixNumber;
65         int itemNum = estate->itemParam->matrixNumber;
66         int maxAbilities = estate->maxAbilities;
67         int numLatents = maxAbilities + triangleLoc1(maxAbilities);
68
69         latentMap.assign(numLatents, -1);
70
71         int numParam = int(fvg->vars.size());
72         for (int px=0; px < numParam; px++) {
73                 omxFreeVar *fv = fvg->vars[px];
74                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
75                         omxFreeVarLocation *loc = &fv->locations[lx];
76                         int matNum = ~loc->matrix;
77                         if (matNum == meanNum) {
78                                 latentMap[loc->row + loc->col] = px;
79                         } else if (matNum == covNum) {
80                                 int a1 = loc->row;
81                                 int a2 = loc->col;
82                                 if (a1 < a2) std::swap(a1, a2);
83                                 int cell = maxAbilities + triangleLoc1(a1) + a2;
84                                 if (latentMap[cell] == -1) {
85                                         latentMap[cell] = px;
86
87                                         if (a1 == a2 && fv->lbound == NEG_INF) {
88                                                 fv->lbound = 1e-6;  // variance must be positive
89                                                 if (fc->est[px] < fv->lbound) {
90                                                         error("Starting value for variance %s is negative", fv->name);
91                                                 }
92                                         }
93                                 } else if (latentMap[cell] != px) {
94                                         // doesn't work for multigroup constraints TODO
95                                         error("In covariance matrix, %s and %s must be constrained equal to preserve symmetry",
96                                               fvg->vars[latentMap[cell]]->name, fv->name);
97                                 }
98                         } else if (matNum == itemNum) {
99                                 omxRaiseErrorf(globalState, "The fitfunction free.set should consist of "
100                                                "latent distribution parameters, excluding item parameters");
101                         }
102                 }
103         }
104         state->haveLatentMap = TRUE;
105 }
106
107 static void buildItemParamMap(omxFitFunction* oo, FitContext *fc)
108 {
109         FreeVarGroup *fvg = fc->varGroup;
110         BA81FitState *state = (BA81FitState *) oo->argStruct;
111         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
112         omxMatrix *itemParam = estate->itemParam;
113         int size = itemParam->cols * state->itemDerivPadSize;
114         state->paramMap.assign(size, -1);  // matrix location to free param index
115         state->itemParamFree.assign(itemParam->rows * itemParam->cols, FALSE);
116
117         size_t numFreeParams = state->numItemParam = fvg->vars.size();
118         state->paramLocations.assign(numFreeParams, 0);
119         state->paramFlavor.assign(numFreeParams, -1);
120
121         for (size_t px=0; px < numFreeParams; px++) {
122                 omxFreeVar *fv = fvg->vars[px];
123                 state->paramLocations[px] = int(fv->locations.size());
124                 for (size_t lx=0; lx < fv->locations.size(); lx++) {
125                         omxFreeVarLocation *loc = &fv->locations[lx];
126                         int matNum = ~loc->matrix;
127                         // prohibit mean & cov TODO
128                         if (matNum == itemParam->matrixNumber) {
129                                 int at = loc->col * state->itemDerivPadSize + loc->row;
130                                 state->paramMap[at] = px;
131                                 state->itemParamFree[loc->col * itemParam->rows + loc->row] = TRUE;
132
133                                 const double *spec = estate->itemSpec[loc->col];
134                                 int id = spec[RPF_ISpecID];
135                                 int flavor;
136                                 double upper, lower;
137                                 (*rpf_model[id].paramInfo)(spec, loc->row, &flavor, &upper, &lower);
138                                 if (state->paramFlavor[px] < 0) {
139                                         state->paramFlavor[px] = flavor;
140                                 } else if (state->paramFlavor[px] != flavor) {
141                                         error("Cannot equate %s with %s[%d,%d]", fv->name,
142                                               itemParam->name, loc->row, loc->col);
143                                 }
144                                 if (fv->lbound == NEG_INF && isfinite(lower)) {
145                                         fv->lbound = lower;
146                                         if (fc->est[px] < fv->lbound) {
147                                                 error("Starting value %s %f less than lower bound %f",
148                                                       fv->name, fc->est[px], lower);
149                                         }
150                                 }
151                                 if (fv->ubound == INF && isfinite(upper)) {
152                                         fv->ubound = upper;
153                                         if (fc->est[px] > fv->ubound) {
154                                                 error("Starting value %s %f greater than upper bound %f",
155                                                       fv->name, fc->est[px], upper);
156                                         }
157                                 }
158                         }
159                 }
160         }
161
162         state->ihessDivisor.resize(size);
163
164         for (int cx=0; cx < itemParam->cols; ++cx) {
165                 const double *spec = estate->itemSpec[cx];
166                 int id = spec[RPF_ISpecID];
167                 int numParam = (*rpf_model[id].numParam)(spec);
168
169                 for (int p1=0; p1 < numParam; p1++) {
170                         int at1 = state->paramMap[cx * state->itemDerivPadSize + p1];
171                         if (at1 < 0) continue;
172
173                         for (int p2=0; p2 <= p1; p2++) {
174                                 int at2 = state->paramMap[cx * state->itemDerivPadSize + p2];
175                                 if (at2 < 0) continue;
176
177                                 if (at1 < at2) std::swap(at1, at2);  // lower triangle
178
179                                 //mxLog("Item %d param(%d,%d) -> H[%d,%d]", cx, p1, p2, at1, at2);
180                                 int at = cx * state->itemDerivPadSize + numParam + triangleLoc1(p1) + p2;
181                                 state->paramMap[at] = numFreeParams + at1 * numFreeParams + at2;
182
183                                 state->ihessDivisor[at] =
184                                         state->paramLocations[at1] * state->paramLocations[at2];
185                         }
186                 }
187         }
188
189         state->haveItemMap = TRUE;
190         //pia(state->paramMap.data(), state->itemDerivPadSize, itemParam->cols);
191 }
192
193 // Depends on item parameters, but not latent distribution
194 OMXINLINE static void
195 computeRPF(BA81Expect *state, omxMatrix *itemParam, const int *quad, double *out)
196 {
197         omxMatrix *design = state->design;
198         int maxDims = state->maxDims;
199         size_t numItems = state->itemSpec.size();
200
201         double theta[maxDims];
202         pointToWhere(state, quad, theta, maxDims);
203
204         for (size_t ix=0; ix < numItems; ix++) {
205                 const double *spec = state->itemSpec[ix];
206                 int id = spec[RPF_ISpecID];
207                 int dims = spec[RPF_ISpecDims];
208                 double ptheta[dims];
209
210                 for (int dx=0; dx < dims; dx++) {
211                         int ability = (int)omxMatrixElement(design, dx, ix) - 1;
212                         if (ability >= maxDims) ability = maxDims-1;
213                         ptheta[dx] = theta[ability];
214                 }
215
216                 double *iparam = omxMatrixColumn(itemParam, ix);
217                 (*rpf_model[id].logprob)(spec, iparam, ptheta, out);
218 #if 0
219                 for (int ox=0; ox < state->itemOutcomes[ix]; ox++) {
220                         if (!isfinite(out[ox]) || out[ox] > 0) {
221                                 mxLog("item param");
222                                 pda(iparam, itemParam->rows, 1);
223                                 mxLog("where");
224                                 pda(ptheta, dims, 1);
225                                 error("RPF returned %20.20f", out[ox]);
226                         }
227                 }
228 #endif
229                 out += state->itemOutcomes[ix];
230         }
231 }
232
233 OMXINLINE static double
234 ba81Fit1Ordinate(omxFitFunction* oo, const long qx, const int *quad,
235                  int want, double *myDeriv)
236 {
237         BA81FitState *state = (BA81FitState*) oo->argStruct;
238         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
239         omxMatrix *itemParam = estate->itemParam;
240         int numItems = itemParam->cols;
241         int maxDims = estate->maxDims;
242         int do_fit = want & FF_COMPUTE_FIT;
243         int do_deriv = want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN | FF_COMPUTE_IHESSIAN);
244
245         double where[maxDims];
246         pointToWhere(estate, quad, where, maxDims);
247
248         double *outcomeProb = NULL;
249         if (do_fit) {
250                 outcomeProb = Realloc(NULL, estate->totalOutcomes, double); // avoid malloc/free? TODO
251                 computeRPF(estate, itemParam, quad, outcomeProb);
252         }
253
254         double thr_ll = 0;
255         const double *oProb = outcomeProb;
256         int outcomeBase = 0;
257         for (int ix=0; ix < numItems; ix++) {
258                 const double *spec = estate->itemSpec[ix];
259                 int id = spec[RPF_ISpecID];
260                 int iOutcomes = estate->itemOutcomes[ix];
261                 const double *weight = estate->expected + outcomeBase * estate->totalQuadPoints + iOutcomes * qx;
262
263                 if (do_fit) {
264                         for (int ox=0; ox < iOutcomes; ox++) {
265                                 double got = weight[ox] * oProb[ox];
266                                 thr_ll += got;
267                         }
268                 }
269
270                 if (do_deriv) {
271                         double *iparam = omxMatrixColumn(itemParam, ix);
272                         double *pad = myDeriv + ix * state->itemDerivPadSize;
273                         (*rpf_model[id].dLL1)(spec, iparam, where, weight, pad);
274                 }
275                 oProb += iOutcomes;
276                 outcomeBase += iOutcomes;
277         }
278
279         Free(outcomeProb);
280
281         return thr_ll;
282 }
283
284 static double
285 ba81ComputeMFit1(omxFitFunction* oo, int want, FitContext *fc)
286 {
287         BA81FitState *state = (BA81FitState*) oo->argStruct;
288         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
289         omxMatrix *customPrior = estate->customPrior;
290         omxMatrix *itemParam = estate->itemParam;
291         std::vector<const double*> &itemSpec = estate->itemSpec;
292         int maxDims = estate->maxDims;
293
294         if (estate->verbose) mxLog("%s: em.fit(%d)", oo->matrix->name, want);
295
296         double *thrDeriv = Calloc(itemParam->cols * state->itemDerivPadSize * Global->numThreads, double);
297
298         double ll = 0;
299         if (customPrior) {
300                 omxRecompute(customPrior);
301                 ll = customPrior->data[0];
302                 // need deriv adjustment TODO
303         }
304
305         if (!isfinite(ll)) {
306                 omxPrint(itemParam, "item param");
307                 error("Bayesian prior returned %g; do you need to add a lbound/ubound?", ll);
308         }
309
310 #pragma omp parallel for num_threads(Global->numThreads) reduction(+:ll)
311         for (long qx=0; qx < estate->totalQuadPoints; qx++) {
312                 int quad[maxDims];
313                 decodeLocation(qx, maxDims, estate->quadGridSize, quad);
314                 double *myDeriv = thrDeriv + itemParam->cols * state->itemDerivPadSize * omx_absolute_thread_num();
315                 ll += ba81Fit1Ordinate(oo, qx, quad, want, myDeriv);
316         }
317
318         int excluded = 0;
319         int numItems = itemParam->cols;
320
321         if (want & (FF_COMPUTE_GRADIENT | FF_COMPUTE_HESSIAN | FF_COMPUTE_IHESSIAN)) {
322                 double *deriv0 = thrDeriv;
323
324                 int perThread = itemParam->cols * state->itemDerivPadSize;
325                 for (int th=1; th < Global->numThreads; th++) {
326                         double *thrD = thrDeriv + th * perThread;
327                         for (int ox=0; ox < perThread; ox++) deriv0[ox] += thrD[ox];
328                 }
329
330                 for (int ix=0; ix < numItems; ix++) {
331                         const double *spec = itemSpec[ix];
332                         int id = spec[RPF_ISpecID];
333                         double *iparam = omxMatrixColumn(itemParam, ix);
334                         double *pad = deriv0 + ix * state->itemDerivPadSize;
335                         (*rpf_model[id].dLL2)(spec, iparam, pad);
336                 }
337
338                 int numFreeParams = int(state->numItemParam);
339                 int numParams = itemParam->cols * state->itemDerivPadSize;
340                 for (int ox=0; ox < numParams; ox++) {
341                         int to = state->paramMap[ox];
342                         if (to == -1) continue;
343
344                         // Need to check because this can happen if
345                         // lbounds/ubounds are not set appropriately.
346                         if (0 && !isfinite(deriv0[ox])) {
347                                 int item = ox / itemParam->rows;
348                                 mxLog("item parameters:\n");
349                                 const double *spec = itemSpec[item];
350                                 int id = spec[RPF_ISpecID];
351                                 int numParam = (*rpf_model[id].numParam)(spec);
352                                 double *iparam = omxMatrixColumn(itemParam, item);
353                                 pda(iparam, numParam, 1);
354                                 // Perhaps bounds can be pulled in from librpf? TODO
355                                 error("Deriv %d for item %d is %f; are you missing a lbound/ubound?",
356                                       ox, item, deriv0[ox]);
357                         }
358
359                         if (to < numFreeParams) {
360                                 if (want & FF_COMPUTE_GRADIENT) {
361                                         fc->grad[to] += deriv0[ox];
362                                 }
363                         } else {
364                                 if (want & FF_COMPUTE_HESSIAN) {
365                                         int Hto = to - numFreeParams;
366                                         fc->hess[Hto] += deriv0[ox];
367                                 }
368                         }
369                 }
370
371                 if (want & FF_COMPUTE_IHESSIAN) {
372                         for (int ix=0; ix < numItems; ix++) {
373                                 const double *spec = itemSpec[ix];
374                                 int id = spec[RPF_ISpecID];
375                                 int iParams = (*rpf_model[id].numParam)(spec);
376                                 double *pad = deriv0 + ix * state->itemDerivPadSize + iParams;
377                                 int *mask = state->itemParamFree.data() + ix * itemParam->rows;
378                                 double stress;
379                                 omxApproxInvertPackedPosDefTriangular(iParams, mask, pad, &stress);
380                                 if (stress) ++excluded;
381                         }
382
383                         for (int ox=0; ox < numParams; ox++) {
384                                 int to = state->paramMap[ox];
385                                 if (to == -1) continue;
386                                 if (to >= numFreeParams) {
387                                         int Hto = to - numFreeParams;
388                                         fc->ihess[Hto] += deriv0[ox] / state->ihessDivisor[ox];
389                                 }
390                         }
391
392                 }
393         }
394
395         if (excluded && estate->verbose >= 1) {
396                 mxLog("%s: Hessian not positive definite for %d/%d items",
397                       oo->matrix->name, excluded, numItems);
398         }
399         if (excluded > numItems/2) {
400                 // maybe not fatal, but investigation needed
401                 omxRaiseErrorf(globalState, "Hessian not positive definite for %d/%d items",
402                                excluded, numItems);
403         }
404
405         Free(thrDeriv);
406
407         return -ll;
408 }
409
410 void ba81SetFreeVarGroup(omxFitFunction *oo, FreeVarGroup *fvg)
411 {}
412
413 static void setLatentStartingValues(omxFitFunction *oo, FitContext *fc)
414 {
415         BA81FitState *state = (BA81FitState*) oo->argStruct;
416         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
417         std::vector<int> &latentMap = state->latentMap;
418         std::vector<double> &ElatentMean = estate->ElatentMean;
419         std::vector<double> &ElatentCov = estate->ElatentCov;
420         int maxAbilities = estate->maxAbilities;
421
422         for (int a1 = 0; a1 < maxAbilities; ++a1) {
423                 if (latentMap[a1] >= 0) {
424                         int to = latentMap[a1];
425                         fc->est[to] = ElatentMean[a1];
426                 }
427
428                 for (int a2 = 0; a2 <= a1; ++a2) {
429                         int to = latentMap[maxAbilities + triangleLoc1(a1) + a2];
430                         if (to < 0) continue;
431                         fc->est[to] = ElatentCov[a1 * maxAbilities + a2];
432                 }
433         }
434
435         //fc->log("setLatentStartingValues", FF_COMPUTE_ESTIMATE);
436 }
437
438 static double
439 ba81ComputeFit(omxFitFunction* oo, int want, FitContext *fc)
440 {
441         BA81FitState *state = (BA81FitState*) oo->argStruct;
442         BA81Expect *estate = (BA81Expect*) oo->expectation->argStruct;
443
444         if (want & FF_COMPUTE_POSTOPTIMIZE) return 0;
445
446         if (estate->type == EXPECTATION_AUGMENTED) {
447                 if (!state->haveItemMap) buildItemParamMap(oo, fc);
448
449                 if (state->numItemParam != fc->varGroup->vars.size()) error("mismatch"); // remove TODO
450
451                 if (want & FF_COMPUTE_PARAMFLAVOR) {
452                         for (size_t px=0; px < state->numItemParam; ++px) {
453                                 if (state->paramFlavor[px] < 0) continue;
454                                 fc->flavor[px] = state->paramFlavor[px];
455                         }
456                         return 0;
457                 }
458
459                 if (want & FF_COMPUTE_PREOPTIMIZE) {
460                         // schilling_bock_2005_rescale(oo, fc); seems counterproductive
461                         return 0;
462                 }
463
464                 double got = ba81ComputeMFit1(oo, want, fc);
465                 return got;
466         } else if (estate->type == EXPECTATION_OBSERVED) {
467                 if (!state->haveLatentMap) buildLatentParamMap(oo, fc);
468
469                 if (want & FF_COMPUTE_PREOPTIMIZE) {
470                         setLatentStartingValues(oo, fc);
471                         return 0;
472                 }
473
474                 omxExpectationCompute(oo->expectation, NULL);
475
476                 if (want & (FF_COMPUTE_GRADIENT|FF_COMPUTE_HESSIAN)) {
477                         warning("%s: Derivs are not available for latent distribution parameters", oo->matrix->name);
478                 }
479
480                 if (want & FF_COMPUTE_FIT) {
481                         double *patternLik = estate->patternLik;
482                         int *numIdentical = estate->numIdentical;
483                         int numUnique = estate->numUnique;
484                         estate->excludedPatterns = 0;
485                         const double LogLargest = estate->LogLargestDouble;
486                         double got = 0;
487 #pragma omp parallel for num_threads(Global->numThreads) reduction(+:got)
488                         for (int ux=0; ux < numUnique; ux++) {
489                                 if (!validPatternLik(estate, patternLik[ux])) {
490 #pragma omp atomic
491                                         ++estate->excludedPatterns;
492                                         // somehow indicate that this -2LL is provisional TODO
493                                         continue;
494                                 }
495                                 got += numIdentical[ux] * (log(patternLik[ux]) - LogLargest);
496                         }
497                         if (estate->verbose) mxLog("%s: fit (%d/%d excluded)",
498                                                    oo->matrix->name, estate->excludedPatterns, numUnique);
499                         //mxLog("fit %.4f", -2 * got);
500                         return -2 * got;
501                 }
502
503                 // if (want & FF_COMPUTE_POSTOPTIMIZE)  discard lxk cache? TODO
504
505                 return 0;
506         } else {
507                 error("Confused");
508         }
509 }
510
511 static void ba81Compute(omxFitFunction *oo, int want, FitContext *fc)
512 {
513         if (!want) return;
514         double got = ba81ComputeFit(oo, want, fc);
515         if (got) oo->matrix->data[0] = got;
516 }
517
518 BA81FitState::~BA81FitState()
519 {
520         Free(tmpLatentMean);
521         Free(tmpLatentCov);
522         omxFreeAllMatrixData(icov);
523         omxFreeAllMatrixData(cholCov);
524 }
525
526 static void ba81Destroy(omxFitFunction *oo) {
527         BA81FitState *state = (BA81FitState *) oo->argStruct;
528         delete state;
529 }
530
531 void omxInitFitFunctionBA81(omxFitFunction* oo)
532 {
533         if (!oo->argStruct) { // ugh!
534                 BA81FitState *state = new BA81FitState;
535                 oo->argStruct = state;
536         }
537
538         BA81FitState *state = (BA81FitState*) oo->argStruct;
539
540         omxExpectation *expectation = oo->expectation;
541         BA81Expect *estate = (BA81Expect*) expectation->argStruct;
542
543         //newObj->data = oo->expectation->data;
544
545         oo->computeFun = ba81Compute;
546         oo->setVarGroup = ba81SetFreeVarGroup;
547         oo->destructFun = ba81Destroy;
548         oo->gradientAvailable = TRUE;
549         oo->hessianAvailable = TRUE;
550         oo->parametersHaveFlavor = TRUE;
551
552         int maxParam = estate->itemParam->rows;
553         state->itemDerivPadSize = maxParam + triangleLoc1(maxParam);
554
555         int maxAbilities = estate->maxAbilities;
556
557         state->tmpLatentMean = Realloc(NULL, estate->maxDims, double);
558         state->tmpLatentCov = Realloc(NULL, estate->maxDims * estate->maxDims, double);
559
560         int numItems = estate->itemParam->cols;
561         for (int ix=0; ix < numItems; ix++) {
562                 const double *spec = estate->itemSpec[ix];
563                 int id = spec[RPF_ISpecID];
564                 if (id < 0 || id >= rpf_numModels) {
565                         error("ItemSpec %d has unknown item model %d", ix, id);
566                 }
567         }
568
569         state->icov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
570         state->cholCov = omxInitMatrix(NULL, maxAbilities, maxAbilities, TRUE, globalState);
571 }