Remove unused argument from omxResizeMatrix
[openmx:openmx.git] / src / omxData.cpp
1 /*
2  *  Copyright 2007-2014 The OpenMx Project
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *       http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *  Unless required by applicable law or agreed to in writing, software
11  *  distributed under the License is distributed on an "AS IS" BASIS,
12  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16
17 /***********************************************************
18  *
19  *  omxData.cc
20  *
21  *  Created: Timothy R. Brick   Date: 2009-07-15
22  *
23  *      Contains code for the omxData class
24  *   omxData objects hold data in whatever form it takes
25  *
26  **********************************************************/
27 #include "omxData.h"
28 #include "glue.h"
29 #include "omxState.h"
30
31 static omxData* omxInitData()
32 {
33         omxData *od = Calloc(1, omxData);
34
35         globalState->dataList.push_back(od);
36
37         return od;
38
39 }
40
41 omxData* omxDataLookupFromState(SEXP dataObject, omxState* state) {
42         int dataIdx = INTEGER(dataObject)[0];
43
44         return state->dataList[dataIdx];
45 }
46
47 static void newDataDynamic(SEXP dataObject, omxData *od)
48 {
49         SEXP dataLoc, dataVal;
50         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("type")));
51         Rf_protect(dataVal = STRING_ELT(dataLoc,0));
52         od->_type = CHAR(dataVal);
53
54         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("expectation")));
55         od->expectation = omxExpectationFromIndex(INTEGER(dataLoc)[0], globalState);
56 }
57
58 static void newDataStatic(SEXP dataObject, omxData *od)
59 {
60         omxState *state = globalState;
61         SEXP dataLoc, dataVal;
62         int numCols;
63
64         // PARSE MxData Structure
65         if(OMX_DEBUG) {mxLog("Processing Data Type.");}
66         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("type")));
67         Rf_protect(dataVal = STRING_ELT(dataLoc,0));
68         od->_type = CHAR(dataVal);
69         if(OMX_DEBUG) {mxLog("Element is type %s.", od->_type);}
70
71         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("observed")));
72         if(OMX_DEBUG) {mxLog("Processing Data Elements.");}
73         if (Rf_isFrame(dataLoc)) {
74                 if(OMX_DEBUG) {mxLog("Data is a frame.");}
75                 // Process Data Frame into Columns
76                 od->cols = Rf_length(dataLoc);
77                 if(OMX_DEBUG) {mxLog("Data has %d columns.", od->cols);}
78                 numCols = od->cols;
79                 od->realData = (double**) R_alloc(numCols, sizeof(double*));
80                 OMXZERO(od->realData, numCols);
81                 od->intData = (int**) R_alloc(numCols, sizeof(int*));
82                 OMXZERO(od->intData, numCols);
83                 od->location = (int*) R_alloc(numCols, sizeof(int));
84                 SEXP colnames;
85                 Rf_protect(colnames = Rf_getAttrib(dataLoc, R_NamesSymbol));
86                 for(int j = 0; j < numCols; j++) {
87                         SEXP rcol;
88                         Rf_protect(rcol = VECTOR_ELT(dataLoc, j));
89                         if(Rf_isFactor(rcol)) {
90                                 if (Rf_isUnordered(rcol)) {
91                                         Rf_warning("Data[%d] '%s' must be an ordered factor. Please use mxFactor()",
92                                                 j+1, CHAR(STRING_ELT(colnames, j)));
93                                 }
94                                 if(OMX_DEBUG) {mxLog("Column %d is a factor.", j);}
95                                 od->intData[j] = INTEGER(rcol);
96                                 od->location[j] = ~j;
97                                 od->numFactor++;
98                         } else if (Rf_isInteger(rcol)) {
99                                 Rf_error("Internal Rf_error: Column %d is in integer format.", j);
100                         } else {
101                                 if(OMX_DEBUG) {mxLog("Column %d is a numeric.", j);}
102                                 od->realData[j] = REAL(rcol);
103                                 od->location[j] = j;
104                                 od->numNumeric++;
105                         }
106                 }
107                 od->rows = Rf_length(VECTOR_ELT(dataLoc, 0));
108                 if(OMX_DEBUG) {mxLog("And %d rows.", od->rows);}
109         } else {
110                 if(OMX_DEBUG) {mxLog("Data contains a matrix.");}
111                 od->dataMat = omxNewMatrixFromRPrimitive(dataLoc, state, 0, 0);
112                 
113                 if (od->dataMat->colMajor && strncmp(od->_type, "raw", 3) == 0) { 
114                         omxToggleRowColumnMajor(od->dataMat);
115                 }
116                 od->cols = od->dataMat->cols;
117                 od->rows = od->dataMat->rows;
118                 od->numNumeric = od->cols;
119         }
120
121         if(OMX_DEBUG) {mxLog("Processing Means Matrix.");}
122         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("means")));
123         od->meansMat = omxNewMatrixFromRPrimitive(dataLoc, state, 0, 0);
124         if(od->meansMat->rows == 1 && od->meansMat->cols == 1 && 
125            (!R_finite(omxMatrixElement(od->meansMat, 0, 0)) ||
126             !std::isfinite(omxMatrixElement(od->meansMat, 0, 0)))) {
127                 omxFreeMatrix(od->meansMat); // Clear just-allocated memory.
128                 od->meansMat = NULL;  // 1-by-1 matrix of NAs is a null means matrix.
129                 // FIXME: The above check may cause problems for dynamic data if the means
130                 //          originally is a 1x1 that has not yet been calculated.  This should be
131                 //          adjusted.
132         }
133         
134         if(OMX_DEBUG) {
135                 if(od->meansMat == NULL) {mxLog("No means found.");}
136                 else {omxPrint(od->meansMat, "Means Matrix is:");}
137         }
138
139         if(OMX_DEBUG) {mxLog("Processing Asymptotic Covariance Matrix.");}
140         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("acov")));
141         od->acovMat = omxNewMatrixFromRPrimitive(dataLoc, state, 0, 0);
142         if(od->acovMat->rows == 1 && od->acovMat->cols == 1 && 
143            (!R_finite(omxMatrixElement(od->acovMat, 0, 0)) ||
144             !std::isfinite(omxMatrixElement(od->acovMat, 0, 0)))) {
145                 omxFreeMatrix(od->acovMat); // Clear just-allocated memory.
146                 od->acovMat = NULL;
147         }
148
149         if(OMX_DEBUG) {mxLog("Processing Observed Thresholds Matrix.");}
150         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("thresholds")));
151         od->obsThresholdsMat = omxNewMatrixFromRPrimitive(dataLoc, state, 0, 0);
152         if(od->obsThresholdsMat->rows == 1 && od->obsThresholdsMat->cols == 1 && 
153            (!R_finite(omxMatrixElement(od->obsThresholdsMat, 0, 0)) ||
154             !std::isfinite(omxMatrixElement(od->obsThresholdsMat, 0, 0)))) {
155                 omxFreeMatrix(od->obsThresholdsMat); // Clear just-allocated memory.
156                 od->obsThresholdsMat = NULL;
157         } else {
158         int nCol = od->obsThresholdsMat->cols;
159                 /* Match threshold column names and build ThresholdCols structure */
160                 od->thresholdCols = (omxThresholdColumn*) R_alloc(nCol, sizeof(omxThresholdColumn));
161         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("thresholdColumns")));
162         int *columns = INTEGER(dataLoc);
163         Rf_protect(dataVal = R_do_slot(dataObject, Rf_install("thresholdLevels")));
164         int *levels = INTEGER(dataVal);
165         for(int i = 0; i < od->obsThresholdsMat->cols; i++) {
166             od->thresholdCols[i].matrix = od->obsThresholdsMat;
167             od->thresholdCols[i].column = columns[i];
168             od->thresholdCols[i].numThresholds = levels[i];
169                         od->numFactor++; //N.B. must increment numFactor when data@type=='raw' (above) AND when data@type=='acov' (here)
170                         if(OMX_DEBUG) {
171                                 mxLog("Column %d is ordinal with %d thresholds in threshold column %d.", 
172                                         i, levels[i], columns[i]);
173                         }
174         }
175         }
176
177         if(strncmp(od->_type, "raw", 3) != 0) {
178                 if(OMX_DEBUG) {mxLog("Processing Observation Count.");}
179                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("numObs")));
180                 od->numObs = REAL(dataLoc)[0];
181         } else {
182                 od->numObs = od->rows;
183                 if(OMX_DEBUG) {mxLog("Processing presort metadata.");}
184                 /* For raw data, process sorting metadata. */
185                 // Process unsorted indices:  // TODO: Generate reverse lookup table
186                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("indexVector")));
187                 od->indexVector = INTEGER(dataLoc);
188                 if(Rf_length(dataLoc) == 0 || od->indexVector[0] == R_NaInt) od->indexVector = NULL;
189                 // Process pre-computed identicality checks
190                 if(OMX_DEBUG) {mxLog("Processing definition variable identicality.");}
191                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("identicalDefVars")));
192                 od->identicalDefs = INTEGER(dataLoc);
193                 if(Rf_length(dataLoc) == 0 || od->identicalDefs[0] == R_NaInt) od->identicalDefs = NULL;
194                 if(OMX_DEBUG) {mxLog("Processing missingness identicality.");}
195                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("identicalMissingness")));
196                 od->identicalMissingness = INTEGER(dataLoc);
197                 if(Rf_length(dataLoc) == 0 || od->identicalMissingness[0] == R_NaInt) od->identicalMissingness = NULL;
198                 if(OMX_DEBUG) {mxLog("Processing row identicality.");}
199                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("identicalRows")));
200                 od->identicalRows = INTEGER(dataLoc);
201                 if(Rf_length(dataLoc) == 0 || od->identicalRows[0] == R_NaInt) od->identicalRows = NULL;
202         }
203 }
204
205 omxData* omxNewDataFromMxData(SEXP dataObject)
206 {
207         if(dataObject == NULL) {
208                 Rf_error("Null Data Object detected.  This is an internal Rf_error, and should be reported on the forums.\n");
209         }
210
211         SEXP DataClass;
212         Rf_protect(DataClass = STRING_ELT(Rf_getAttrib(dataObject, Rf_install("class")), 0));
213         const char* dclass = CHAR(DataClass);
214         if(OMX_DEBUG) {mxLog("Initializing %s element", dclass);}
215         omxData* od = omxInitData();
216         if (strcmp(dclass, "MxDataStatic")==0) newDataStatic(dataObject, od);
217         else if (strcmp(dclass, "MxDataDynamic")==0) newDataDynamic(dataObject, od);
218         else Rf_error("Unknown data class %s", dclass);
219         return od;
220 }
221
222 void resetDefinitionVariables(double *oldDefs, int numDefs) {
223         int nextDef;
224
225         for(nextDef = 0; nextDef < numDefs; nextDef++) {
226                 oldDefs[nextDef] = NA_REAL;                                     // Def Vars default to NA
227         }
228
229 }
230
231 void omxFreeData(omxData* od) {
232         omxFreeMatrix(od->dataMat);
233         omxFreeMatrix(od->meansMat);
234         omxFreeMatrix(od->acovMat);
235         omxFreeMatrix(od->obsThresholdsMat);
236         Free(od);
237 }
238
239 double omxDoubleDataElement(omxData *od, int row, int col) {
240         if(od->dataMat != NULL) {
241                 return omxMatrixElement(od->dataMat, row, col);
242         }
243         int location = od->location[col];
244         if(location < 0) {
245                 return (double)(od->intData[~location][row]);
246         } else {
247                 return od->realData[location][row];
248         }
249 }
250
251 int omxIntDataElement(omxData *od, int row, int col) {
252         if(od->dataMat != NULL) {
253                 Rf_error("Use a data frame for factor data");
254         }
255
256         int location = od->location[col];
257         if(location < 0) {
258                 return (od->intData[~location][row]);
259         } else {
260                 return (int)(od->realData[location][row]);
261         }
262 }
263
264 omxMatrix* omxDataCovariance(omxData *od)
265 {
266         if (od->dataMat) return od->dataMat;
267
268         if (od->expectation) {
269                 return omxGetExpectationComponent(od->expectation, NULL, "covariance");
270         }
271
272         // The frontend should ensure matrix storage we we can delete the rest
273         // of this function. TODO
274
275         int numRows = od->rows, numCols = od->cols;
276
277         // should we store the new matrix in od->dataMat? TODO
278         omxMatrix *om = omxInitMatrix(numRows, numCols, TRUE, globalState);
279
280         if(om->rows != numRows || om->cols != numCols) {
281                 omxResizeMatrix(om, numRows, numCols);
282         }
283
284         double dataElement;
285         for(int j = 0; j < numCols; j++) {
286                 for(int k = 0; k < numRows; k++) {
287                         int location = od->location[j];
288                         if(location < 0) {
289                                 dataElement = (double) od->intData[~location][k];
290                         } else {
291                                 dataElement = od->realData[location][k];
292                         }
293                         omxSetMatrixElement(om, k, j, dataElement);
294                 }
295         }
296         return om;
297 }
298
299 omxMatrix* omxDataAcov(omxData *od) {
300         if(od->acovMat) return od->acovMat;
301
302         // Otherwise, we must construct the matrix.
303         int numRows = ( (od->rows)*(od->rows + 1) ) / 2;
304         
305         omxMatrix* om = omxInitMatrix(numRows, numRows, TRUE, globalState);
306         omxCopyMatrix(om, od->acovMat);//omxAliasMatrix(om, od->acovMat); // Could also be done with omxCopyMatrix.
307         return om;
308 }
309
310 unsigned short int omxDataColumnIsFactor(omxData *od, int col) {
311         if(od->dataMat != NULL) return FALSE;
312         if(col <= od->cols) return (od->location[col] < 0);
313
314         Rf_error("Attempted to access column %d of a %d-column data object", col, od->cols);
315         return 0; // not reached
316 }
317
318 omxMatrix* omxDataMeans(omxData *od)
319 {
320         if (od->meansMat) return od->meansMat;
321         if (od->expectation) {
322                 return omxGetExpectationComponent(od->expectation, NULL, "mean");
323         }
324         return NULL;
325 }
326
327 omxThresholdColumn* omxDataThresholds(omxData *od) {
328     return od->thresholdCols;
329 }
330
331 void omxSetContiguousDataColumns(omxContiguousData* contiguous, omxData* data, omxMatrix* colList) {
332
333         contiguous->isContiguous = FALSE;   // Assume not contiguous
334
335         if (data->dataMat == NULL) return; // Data has no matrix elements, so skip.
336
337         omxMatrix* dataMat = data->dataMat;
338         if (dataMat->colMajor) return;      // If data matrix is column-major, there's no continuity
339         
340         int colListLength = colList->cols;              // # of columns in the cov matrix
341         double start = omxVectorElement(colList, 0);    // Data col of first column of the covariance
342         contiguous->start = (int) start;                // That's our starting point.
343         contiguous->length = colListLength;             // And the length is ncol(cov)
344         
345         for(int i = 1; i < colListLength; i++) {        // Make sure that the col list is 
346                 double next = omxVectorElement(colList, i); // contiguously increasing in column number
347                 if (next != (start + i)) return;            // If it isn't, it's not contiguous data
348         }
349         
350         contiguous->isContiguous = TRUE;    // Passed.  This is contiguous.
351 }
352
353 void omxContiguousDataRow(omxData *od, int row, int start, int len, omxMatrix* om) {
354         // TODO: Might be better to combine this with omxDataRow to make a single accessor omxDataRow with a second signature that accepts an omxContiguousData argument.
355         if(row >= od->rows) Rf_error("Invalid row");
356
357         if(om == NULL) Rf_error("Must provide an output matrix");
358         
359         if (om->cols < len) Rf_error("omxContiguousDataRow: output matrix is too small");
360         int numcols = od->cols;
361         omxMatrix* dataMat = od->dataMat;
362         double *dest = om->data;
363         double *source = dataMat->data + row * numcols + start;
364         memcpy(dest, source, sizeof(double) * len);
365 }
366
367 void omxDataRow(omxData *od, int row, omxMatrix* colList, omxMatrix* om) {
368
369         if(colList == NULL || row >= od->rows) Rf_error("Invalid row or colList");
370
371         if(om == NULL) Rf_error("Must provide an output matrix");
372
373         int numcols = om->cols;
374         if(od->dataMat != NULL) { // Matrix Object
375                 omxMatrix* dataMat = od->dataMat;
376                 for(int j = 0; j < numcols; j++) {
377                         omxSetMatrixElement(om, 0, j, omxMatrixElement(dataMat, row, 
378                                                                        omxVectorElement(colList, j)));
379                 }
380         } else {                // Data Frame object
381                 double dataElement;
382                 int* locations = od->location;
383                 int** intDataColumns = od->intData;
384                 double **realDataColumns = od->realData;
385                 for(int j = 0; j < numcols; j++) {
386                         int location = locations[(int)omxVectorElement(colList, j)];
387                         if(location < 0) {
388                                 dataElement = (double) intDataColumns[~location][row];
389                         } else {
390                                 dataElement = realDataColumns[location][row];
391                         }
392                         omxSetMatrixElement(om, 0, j, dataElement);
393                 }
394         }
395 }
396
397 int omxDataIndex(omxData *od, int row) {
398         if(od->indexVector != NULL)
399                 return od->indexVector[row];
400         else return row;
401 }
402
403 int omxDataNumIdenticalRows(omxData *od, int row) {
404         if(od->identicalRows != NULL)
405                 return od->identicalRows[row];
406         else return 1;
407 }
408 int omxDataNumIdenticalMissingness(omxData *od, int row) {
409         if(od->identicalMissingness != NULL)
410                 return od->identicalMissingness[row];
411         else return 1;
412 }
413
414 int omxDataNumIdenticalDefs(omxData *od, int row){
415         if(od->identicalDefs != NULL)
416                 return od->identicalDefs[row];
417         else return 1;
418 }
419
420 int omxDataNumIdenticalContinuousRows(omxData *od, int row) {
421         if(od->numNumeric <= 0) {
422                 return od->rows;
423         }
424         return omxDataNumIdenticalRows(od, row);
425 }
426
427 int omxDataNumIdenticalContinuousMissingness(omxData *od, int row) {
428         if(od->numNumeric <= 0) {
429                 return od->rows;
430         }
431         return omxDataNumIdenticalMissingness(od, row);
432 }
433
434 int omxDataNumIdenticalOrdinalRows(omxData *od, int row) {
435         if(od->numFactor <= 0) {
436                 return od->rows;
437         }
438         return omxDataNumIdenticalRows(od, row);
439 }
440
441 int omxDataNumIdenticalOrdinalMissingness(omxData *od, int row) {
442         if(od->numFactor <= 0) {
443                 return od->rows;
444         }
445         return omxDataNumIdenticalMissingness(od, row);
446 }
447
448
449 double omxDataNumObs(omxData *od)
450 {
451         if (od->expectation) {
452                 omxMatrix *mat = omxGetExpectationComponent(od->expectation, NULL, "numObs");
453                 return omxMatrixElement(mat, 0, 0);
454         }
455         return od->numObs;
456 }
457
458 int omxDataNumFactor(omxData *od) {
459         return od->numFactor;
460 }
461
462 int omxDataNumNumeric(omxData *od) {
463         return od->numNumeric;
464 }
465
466 const char *omxDataType(omxData *od) {
467         return od->_type;
468 }
469
470 int elementEqualsDataframe(SEXP column, int offset1, int offset2) {
471         switch (TYPEOF(column)) {
472         case REALSXP:
473                 if(ISNA(REAL(column)[offset1])) return ISNA(REAL(column)[offset2]);
474                 if(ISNA(REAL(column)[offset2])) return ISNA(REAL(column)[offset1]);
475                 return(REAL(column)[offset1] == REAL(column)[offset2]);
476         case LGLSXP:
477         case INTSXP:
478                 return(INTEGER(column)[offset1] == INTEGER(column)[offset2]);           
479         }
480         return(0);
481 }
482
483 int testRowDataframe(SEXP data, int numrow, int numcol, int i, int *row, int base) {
484         SEXP column;
485         int j, equal = TRUE;
486
487         if (i == numrow) {
488                 equal = FALSE;
489         } else {
490                 for(j = 0; j < numcol && equal; j++) {
491                         column = VECTOR_ELT(data, j);
492                         equal = elementEqualsDataframe(column, base, i);
493                 }
494         }
495
496         if (!equal) {
497                 int gap = i - base;
498                 for(j = 0; j < gap; j++) {
499                         row[base + j] = gap - j;
500                 }
501                 base = i;
502         }
503         return(base);
504 }
505
506 int elementEqualsMatrix(SEXP data, int row1, int row2, int numrow, int col) {
507         int coloffset = col * numrow;
508         switch (TYPEOF(data)) {
509         case REALSXP:
510                 if(ISNA(REAL(data)[row1 + coloffset])) return ISNA(REAL(data)[row2 + coloffset]);
511                 if(ISNA(REAL(data)[row2 + coloffset])) return ISNA(REAL(data)[row1 + coloffset]);
512                 return(REAL(data)[row1 + coloffset] == REAL(data)[row2 + coloffset]);
513         case LGLSXP:
514         case INTSXP:
515                 return(INTEGER(data)[row1 + coloffset] == INTEGER(data)[row2 + coloffset]);
516         }
517         return(0);
518 }
519
520 int testRowMatrix(SEXP data, int numrow, int numcol, int i, int *row, int base) {
521         int j, equal = TRUE;
522
523         if (i == numrow) {
524                 equal = FALSE;
525         } else {
526                 for(j = 0; j < numcol && equal; j++) {
527                         equal = elementEqualsMatrix(data, i, base, numrow, j);
528                 }
529         }
530
531         if (!equal) {
532                 int gap = i - base;
533                 for(j = 0; j < gap; j++) {
534                         row[base + j] = gap - j;
535                 }
536                 base = i;
537         }
538         return(base);
539 }
540
541 SEXP findIdenticalMatrix(SEXP data, SEXP missing, SEXP defvars,
542                          SEXP skipMissingExp, SEXP skipDefvarsExp) {
543
544         SEXP retval, identicalRows, identicalMissing, identicalDefvars;
545         int i, numrow, numcol, defvarcol;
546         int *irows, *imissing, *idefvars;
547         int baserows, basemissing, basedefvars;
548         int skipMissing, skipDefvars;
549
550         skipMissing = LOGICAL(skipMissingExp)[0];
551         skipDefvars = LOGICAL(skipDefvarsExp)[0];
552         numrow = Rf_nrows(data);
553         numcol = Rf_ncols(data);
554         defvarcol = Rf_ncols(defvars);
555         Rf_protect(retval = Rf_allocVector(VECSXP, 3));
556         Rf_protect(identicalRows = Rf_allocVector(INTSXP, numrow));
557         Rf_protect(identicalMissing = Rf_allocVector(INTSXP, numrow));
558         Rf_protect(identicalDefvars = Rf_allocVector(INTSXP, numrow));
559         irows = INTEGER(identicalRows);
560         imissing = INTEGER(identicalMissing);
561         idefvars = INTEGER(identicalDefvars);
562         if (skipMissing) {
563                 for(i = 0; i < numrow; i++) {
564                         imissing[i] = numrow - i;
565                 }
566         }
567         if (skipDefvars) {
568                 for(i = 0; i < numrow; i++) {
569                         idefvars[i] = numrow - i;
570                 }
571         }
572         baserows = 0;
573         basemissing = 0;
574         basedefvars = 0;
575         for(i = 1; i <= numrow; i++) {
576                 baserows = testRowMatrix(data, numrow, numcol, i, irows, baserows); 
577                 if (!skipMissing) {
578                         basemissing = testRowMatrix(missing, numrow, numcol, i, imissing, basemissing); 
579                 }
580                 if (!skipDefvars) {
581                         basedefvars = testRowMatrix(defvars, numrow, defvarcol, i, idefvars, basedefvars);
582                 }
583         }
584         SET_VECTOR_ELT(retval, 0, identicalRows);
585         SET_VECTOR_ELT(retval, 1, identicalMissing);
586         SET_VECTOR_ELT(retval, 2, identicalDefvars);
587         Rf_unprotect(4); // retval, identicalRows, identicalMissing, identicalDefvars
588         return retval;
589 }
590
591 SEXP findIdenticalDataFrame(SEXP data, SEXP missing, SEXP defvars,
592                             SEXP skipMissingExp, SEXP skipDefvarsExp) {
593
594         SEXP retval, identicalRows, identicalMissing, identicalDefvars;
595         int i, numrow, numcol, defvarcol;
596         int *irows, *imissing, *idefvars;
597         int baserows, basemissing, basedefvars;
598         int skipMissing, skipDefvars;
599
600         skipMissing = LOGICAL(skipMissingExp)[0];
601         skipDefvars = LOGICAL(skipDefvarsExp)[0];
602         numrow = Rf_length(VECTOR_ELT(data, 0));
603         numcol = Rf_length(data);
604         defvarcol = Rf_length(defvars);
605         Rf_protect(retval = Rf_allocVector(VECSXP, 3));
606         Rf_protect(identicalRows = Rf_allocVector(INTSXP, numrow));
607         Rf_protect(identicalMissing = Rf_allocVector(INTSXP, numrow));
608         Rf_protect(identicalDefvars = Rf_allocVector(INTSXP, numrow));
609         irows = INTEGER(identicalRows);
610         imissing = INTEGER(identicalMissing);
611         idefvars = INTEGER(identicalDefvars);
612         if (skipMissing) {
613                 for(i = 0; i < numrow; i++) {
614                         imissing[i] = numrow - i;
615                 }
616         }
617         if (skipDefvars) {
618                 for(i = 0; i < numrow; i++) {
619                         idefvars[i] = numrow - i;
620                 }
621         }
622         baserows = 0;
623         basemissing = 0;
624         basedefvars = 0;
625         for(i = 1; i <= numrow; i++) {
626                 baserows = testRowDataframe(data, numrow, numcol, i, irows, baserows); 
627                 if (!skipMissing) {
628                         basemissing = testRowMatrix(missing, numrow, numcol, i, imissing, basemissing);
629                 }
630                 if (!skipDefvars) {
631                         basedefvars = testRowDataframe(defvars, numrow, defvarcol, i, idefvars, basedefvars);
632                 }
633         }
634         SET_VECTOR_ELT(retval, 0, identicalRows);
635         SET_VECTOR_ELT(retval, 1, identicalMissing);
636         SET_VECTOR_ELT(retval, 2, identicalDefvars);
637         Rf_unprotect(4); // retval, identicalRows, identicalMissing, identicalDefvars
638         return retval;
639 }
640
641 SEXP findIdenticalRowsData2(SEXP data, SEXP missing, SEXP defvars,
642                            SEXP skipMissingness, SEXP skipDefvars) {
643         if (Rf_isMatrix(data)) {
644                 return(findIdenticalMatrix(data, missing, defvars, skipMissingness, skipDefvars));
645         } else {
646                 return(findIdenticalDataFrame(data, missing, defvars, skipMissingness, skipDefvars));
647         }
648 }
649
650 SEXP findIdenticalRowsData(SEXP data, SEXP missing, SEXP defvars,
651                            SEXP skipMissingness, SEXP skipDefvars)
652 {
653         omxManageProtectInsanity protectManager;
654
655         try {
656                 return findIdenticalRowsData2(data, missing, defvars,
657                                               skipMissingness, skipDefvars);
658         } catch( std::exception& __ex__ ) {
659                 exception_to_try_Rf_error( __ex__ );
660         } catch(...) {
661                 string_to_try_Rf_error( "c++ exception (unknown reason)" );
662         }
663 }
664
665
666 void omxPrintData(omxData *od, const char *header, int maxRows)
667 {
668         if (!header) header = "Default data";
669
670         if (!od) {
671                 mxLog("%s: NULL", header);
672                 return;
673         }
674
675         std::string buf;
676         buf += string_snprintf("%s(%s): %f observations %d x %d\n", header, od->_type, od->numObs,
677                                od->rows, od->cols);
678         buf += string_snprintf("Row consists of %d numeric, %d ordered factor:", od->numNumeric, od->numFactor);
679
680         int upto = od->numObs;
681         if (maxRows >= 0 && maxRows < upto) upto = maxRows;
682
683         if (od->location) {
684                 for(int j = 0; j < od->cols; j++) {
685                         int loc = od->location[j];
686                         if (loc < 0) {
687                                 buf += " I";
688                         } else {
689                                 buf += " N";
690                         }
691                 }
692                 buf += "\n";
693
694                 for (int vx=0; vx < upto; vx++) {
695                         for (int j = 0; j < od->cols; j++) {
696                                 int loc = od->location[j];
697                                 if (loc < 0) {
698                                         int *val = od->intData[~loc];
699                                         if (val[vx] == NA_INTEGER) {
700                                                 buf += " NA,";
701                                         } else {
702                                                 buf += string_snprintf(" %d,", val[vx]);
703                                         }
704                                 } else {
705                                         double *val = od->realData[loc];
706                                         if (val[vx] == NA_REAL) {
707                                                 buf += " NA,";
708                                         } else {
709                                                 buf += string_snprintf(" %.3g,", val[vx]);
710                                         }
711                                 }
712                         }
713                         buf += "\n";
714                 }
715         }
716
717         if (od->identicalRows) {
718                 buf += "DUPS\trow\tmissing\tdefvars\n";
719                 for(int j = 0; j < upto; j++) {
720                         buf += string_snprintf("%d\t%d\t%d\t%d\n", j, od->identicalRows[j],
721                                                od->identicalMissingness[j], od->identicalDefs[j]);
722                 }
723         }
724         mxLogBig(buf);
725
726         if (od->dataMat) omxPrintMatrix(od->dataMat, "dataMat");
727         if (od->meansMat) omxPrintMatrix(od->meansMat, "meansMat");
728 }
729
730 void omxPrintData(omxData *od, const char *header)
731 {
732         omxPrintData(od, header, -1);
733 }
734