Enable R_NO_REMAP for a cleaner namespace
[openmx:openmx.git] / src / omxData.cpp
1 /*
2  *  Copyright 2007-2014 The OpenMx Project
3  *
4  *  Licensed under the Apache License, Version 2.0 (the "License");
5  *  you may not use this file except in compliance with the License.
6  *  You may obtain a copy of the License at
7  *
8  *       http://www.apache.org/licenses/LICENSE-2.0
9  *
10  *  Unless required by applicable law or agreed to in writing, software
11  *  distributed under the License is distributed on an "AS IS" BASIS,
12  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  *  See the License for the specific language governing permissions and
14  *  limitations under the License.
15  */
16
17 /***********************************************************
18  *
19  *  omxData.cc
20  *
21  *  Created: Timothy R. Brick   Date: 2009-07-15
22  *
23  *      Contains code for the omxData class
24  *   omxData objects hold data in whatever form it takes
25  *
26  **********************************************************/
27 #include "omxData.h"
28 #include "glue.h"
29 #include "omxState.h"
30
31 omxData* omxInitData(omxState* os) {
32
33         omxData *od = Calloc(1, omxData);
34
35         od->currentState = os;
36
37         if (os != globalState) Rf_error("Too late to create omxData");
38         os->dataList.push_back(od);
39
40         return od;
41
42 }
43
44 omxData* omxDataLookupFromState(SEXP dataObject, omxState* state) {
45         int dataIdx = INTEGER(dataObject)[0];
46
47         return state->dataList[dataIdx];
48 }
49
50 omxData* omxNewDataFromMxData(SEXP dataObject, omxState* state) {
51         if(OMX_DEBUG) {mxLog("Initializing data Element.");}
52         if(dataObject == NULL) {
53                 Rf_error("Null Data Object detected.  This is an internal Rf_error, and should be reported on the forums.\n");
54         }
55
56         omxData* od = omxInitData(state);
57
58         SEXP dataLoc, dataVal;
59         int numCols;
60
61         // PARSE MxData Structure
62         if(OMX_DEBUG) {mxLog("Processing Data Type.");}
63         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("type")));
64         if(dataLoc == NULL) { Rf_error("Data has no type.  Sorry.\nThis is an internal Rf_error, and should be reported on the forums.\n");}
65         Rf_protect(dataVal = STRING_ELT(dataLoc,0));
66         od->_type = CHAR(dataVal);
67         if(OMX_DEBUG) {mxLog("Element is type %s.", od->_type);}
68
69         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("observed")));
70         if(OMX_DEBUG) {mxLog("Processing Data Elements.");}
71         if(Rf_isFrame(dataLoc)) {
72                 if(OMX_DEBUG) {mxLog("Data is a frame.");}
73                 // Process Data Frame into Columns
74                 od->cols = Rf_length(dataLoc);
75                 if(OMX_DEBUG) {mxLog("Data has %d columns.", od->cols);}
76                 numCols = od->cols;
77                 od->realData = (double**) R_alloc(numCols, sizeof(double*));
78                 OMXZERO(od->realData, numCols);
79                 od->intData = (int**) R_alloc(numCols, sizeof(int*));
80                 OMXZERO(od->intData, numCols);
81                 od->location = (int*) R_alloc(numCols, sizeof(int));
82                 SEXP colnames;
83                 Rf_protect(colnames = Rf_getAttrib(dataLoc, R_NamesSymbol));
84                 for(int j = 0; j < numCols; j++) {
85                         SEXP rcol;
86                         Rf_protect(rcol = VECTOR_ELT(dataLoc, j));
87                         if(Rf_isFactor(rcol)) {
88                                 if (Rf_isUnordered(rcol)) {
89                                         Rf_warning("Data[%d] '%s' must be an ordered factor. Please use mxFactor()",
90                                                 j+1, CHAR(STRING_ELT(colnames, j)));
91                                 }
92                                 if(OMX_DEBUG) {mxLog("Column %d is a factor.", j);}
93                                 od->intData[j] = INTEGER(rcol);
94                                 od->location[j] = ~j;
95                                 od->numFactor++;
96                         } else if (Rf_isInteger(rcol)) {
97                                 Rf_error("Internal Rf_error: Column %d is in integer format.", j);
98                         } else {
99                                 if(OMX_DEBUG) {mxLog("Column %d is a numeric.", j);}
100                                 od->realData[j] = REAL(rcol);
101                                 od->location[j] = j;
102                                 od->numNumeric++;
103                         }
104                 }
105                 od->rows = Rf_length(VECTOR_ELT(dataLoc, 0));
106                 if(OMX_DEBUG) {mxLog("And %d rows.", od->rows);}
107         } else {
108                 if(OMX_DEBUG) {mxLog("Data contains a matrix.");}
109                 od->dataMat = omxNewMatrixFromRPrimitive(dataLoc, od->currentState, 0, 0);
110                 
111                 if (od->dataMat->colMajor && strncmp(od->_type, "raw", 3) == 0) { 
112                         omxToggleRowColumnMajor(od->dataMat);
113                 }
114                 od->cols = od->dataMat->cols;
115                 od->rows = od->dataMat->rows;
116                 od->numNumeric = od->cols;
117         }
118
119         if(OMX_DEBUG) {mxLog("Processing Means Matrix.");}
120         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("means")));
121         od->meansMat = omxNewMatrixFromRPrimitive(dataLoc, od->currentState, 0, 0);
122         if(od->meansMat->rows == 1 && od->meansMat->cols == 1 && 
123            (!R_finite(omxMatrixElement(od->meansMat, 0, 0)) ||
124             !isfinite(omxMatrixElement(od->meansMat, 0, 0)))) {
125                 omxFreeMatrixData(od->meansMat); // Clear just-allocated memory.
126                 od->meansMat = NULL;  // 1-by-1 matrix of NAs is a null means matrix.
127                 // FIXME: The above check may cause problems for dynamic data if the means
128                 //          originally is a 1x1 that has not yet been calculated.  This should be
129                 //          adjusted.
130         }
131         
132         if(OMX_DEBUG) {
133                 if(od->meansMat == NULL) {mxLog("No means found.");}
134                 else {omxPrint(od->meansMat, "Means Matrix is:");}
135         }
136
137         if(OMX_DEBUG) {mxLog("Processing Asymptotic Covariance Matrix.");}
138         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("acov")));
139         od->acovMat = omxNewMatrixFromRPrimitive(dataLoc, od->currentState, 0, 0);
140         if(od->acovMat->rows == 1 && od->acovMat->cols == 1 && 
141            (!R_finite(omxMatrixElement(od->acovMat, 0, 0)) ||
142             !isfinite(omxMatrixElement(od->acovMat, 0, 0)))) {
143                 omxFreeMatrixData(od->acovMat); // Clear just-allocated memory.
144                 od->acovMat = NULL;
145         }
146
147         if(OMX_DEBUG) {mxLog("Processing Observed Thresholds Matrix.");}
148         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("thresholds")));
149         od->obsThresholdsMat = omxNewMatrixFromRPrimitive(dataLoc, od->currentState, 0, 0);
150         if(od->obsThresholdsMat->rows == 1 && od->obsThresholdsMat->cols == 1 && 
151            (!R_finite(omxMatrixElement(od->obsThresholdsMat, 0, 0)) ||
152             !isfinite(omxMatrixElement(od->obsThresholdsMat, 0, 0)))) {
153                 omxFreeMatrixData(od->obsThresholdsMat); // Clear just-allocated memory.
154                 od->obsThresholdsMat = NULL;
155         } else {
156         int nCol = od->obsThresholdsMat->cols;
157                 /* Match threshold column names and build ThresholdCols structure */
158                 od->thresholdCols = (omxThresholdColumn*) R_alloc(nCol, sizeof(omxThresholdColumn));
159         Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("thresholdColumns")));
160         int *columns = INTEGER(dataLoc);
161         Rf_protect(dataVal = R_do_slot(dataObject, Rf_install("thresholdLevels")));
162         int *levels = INTEGER(dataVal);
163         for(int i = 0; i < od->obsThresholdsMat->cols; i++) {
164             od->thresholdCols[i].matrix = od->obsThresholdsMat;
165             od->thresholdCols[i].column = columns[i];
166             od->thresholdCols[i].numThresholds = levels[i];
167                         if(OMX_DEBUG) { mxLog("omxData observed thresholds SOURCE column %d.", i); }
168                         if(OMX_DEBUG) {omxPrintMatrix(od->thresholdCols[i].matrix, "...Thresholds matrix"); }
169                         if(OMX_DEBUG) { mxLog("omxData observed thresholds DEST column %d.", od->thresholdCols[i].column); }
170                         if(OMX_DEBUG) { mxLog("omxData observed thresholds numThresholds %d.", od->thresholdCols[i].numThresholds); }
171         }
172         }
173
174         if(strncmp(od->_type, "raw", 3) != 0) {
175                 if(OMX_DEBUG) {mxLog("Processing Observation Count.");}
176                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("numObs")));
177                 od->numObs = REAL(dataLoc)[0];
178         } else {
179                 od->numObs = od->rows;
180                 if(OMX_DEBUG) {mxLog("Processing presort metadata.");}
181                 /* For raw data, process sorting metadata. */
182                 // Process unsorted indices:  // TODO: Generate reverse lookup table
183                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("indexVector")));
184                 od->indexVector = INTEGER(dataLoc);
185                 if(Rf_length(dataLoc) == 0 || od->indexVector[0] == R_NaInt) od->indexVector = NULL;
186                 // Process pre-computed identicality checks
187                 if(OMX_DEBUG) {mxLog("Processing definition variable identicality.");}
188                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("identicalDefVars")));
189                 od->identicalDefs = INTEGER(dataLoc);
190                 if(Rf_length(dataLoc) == 0 || od->identicalDefs[0] == R_NaInt) od->identicalDefs = NULL;
191                 if(OMX_DEBUG) {mxLog("Processing missingness identicality.");}
192                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("identicalMissingness")));
193                 od->identicalMissingness = INTEGER(dataLoc);
194                 if(Rf_length(dataLoc) == 0 || od->identicalMissingness[0] == R_NaInt) od->identicalMissingness = NULL;
195                 if(OMX_DEBUG) {mxLog("Processing row identicality.");}
196                 Rf_protect(dataLoc = R_do_slot(dataObject, Rf_install("identicalRows")));
197                 od->identicalRows = INTEGER(dataLoc);
198                 if(Rf_length(dataLoc) == 0 || od->identicalRows[0] == R_NaInt) od->identicalRows = NULL;
199         }
200         return od;
201 }
202
203 void resetDefinitionVariables(double *oldDefs, int numDefs) {
204         int nextDef;
205
206         for(nextDef = 0; nextDef < numDefs; nextDef++) {
207                 oldDefs[nextDef] = NA_REAL;                                     // Def Vars default to NA
208         }
209
210 }
211
212 void omxFreeData(omxData* od) {
213         omxFreeAllMatrixData(od->dataMat);
214         omxFreeAllMatrixData(od->meansMat);
215         omxFreeAllMatrixData(od->acovMat);
216         omxFreeAllMatrixData(od->obsThresholdsMat);
217         Free(od);
218 }
219
220 double omxDoubleDataElement(omxData *od, int row, int col) {
221         if(od->dataMat != NULL) {
222                 return omxMatrixElement(od->dataMat, row, col);
223         }
224         int location = od->location[col];
225         if(location < 0) {
226                 return (double)(od->intData[~location][row]);
227         } else {
228                 return od->realData[location][row];
229         }
230 }
231
232 int omxIntDataElement(omxData *od, int row, int col) {
233         if(od->dataMat != NULL) {
234                 Rf_error("Use a data frame for factor data");
235         }
236
237         int location = od->location[col];
238         if(location < 0) {
239                 return (od->intData[~location][row]);
240         } else {
241                 return (int)(od->realData[location][row]);
242         }
243 }
244
245 omxMatrix* omxDataMatrix(omxData *od, omxMatrix* om) {
246         double dataElement;
247
248         if(od->dataMat != NULL) {               // Data was entered as a matrix.
249                 if(om != NULL) {                        // It stays as such
250                         omxCopyMatrix(om, od->dataMat);
251                         return om;
252                 }
253                 return od->dataMat;
254         }
255         // Otherwise, we must construct the matrix.
256         int numRows = od->rows, numCols = od->cols;
257
258         if(om == NULL) {
259                 om = omxInitMatrix(om, numRows, numCols, TRUE, od->currentState);
260         }
261
262         if(om->rows != numRows || om->cols != numCols) {
263                 omxResizeMatrix(om, numRows, numCols, FALSE);
264         }
265
266         for(int j = 0; j < numCols; j++) {
267                 for(int k = 0; k < numRows; k++) {
268                         int location = od->location[j];
269                         if(location < 0) {
270                                 dataElement = (double) od->intData[~location][k];
271                         } else {
272                                 dataElement = od->realData[location][k];
273                         }
274                         omxSetMatrixElement(om, k, j, dataElement);
275                 }
276         }
277         return om;
278 }
279
280 omxMatrix* omxDataAcov(omxData *od, omxMatrix* om) {
281         if(od->acovMat != NULL) {               // acov was entered as a matrix.
282                 if(om != NULL) {                        // It stays as such
283                         omxAliasMatrix(om, od->acovMat);
284                         return om;
285                 }
286                 return od->acovMat;
287         }
288         // Otherwise, we must construct the matrix.
289         int numRows = ( (od->rows)*(od->rows + 1) ) / 2;
290         
291         if(om == NULL) {
292                 om = omxInitMatrix(om, numRows, numRows, TRUE, od->currentState);
293         }
294         omxCopyMatrix(om, od->acovMat);//omxAliasMatrix(om, od->acovMat); // Could also be done with omxCopyMatrix.
295         return om;
296 }
297
298 unsigned short int omxDataColumnIsFactor(omxData *od, int col) {
299         if(od->dataMat != NULL) return FALSE;
300         if(col <= od->cols) return (od->location[col] < 0);
301
302         Rf_error("Attempted to access column %d of a %d-column data object", col, od->cols);
303         return 0; // not reached
304 }
305
306 omxMatrix* omxDataMeans(omxData *od, omxMatrix* colList, omxMatrix* om) {
307
308         if(od->meansMat == NULL) return NULL;
309
310         if(colList == NULL) {
311                 if(om == NULL) return od->meansMat;
312                 omxCopyMatrix(om, od->meansMat);
313                 return om;
314         }
315
316         int cols = colList->cols;
317
318         if(colList == NULL || cols == 0 || cols > od->cols) {
319                 cols = od->cols;
320                 if(om == NULL) return od->meansMat;
321                 omxCopyMatrix(om, od->meansMat);
322                 return om;
323         }
324
325         if(om == NULL) {
326                 om = omxInitMatrix(om, 1, cols, TRUE, od->currentState);
327         }
328
329         for(int i = 0; i < cols; i++) {
330                 omxSetMatrixElement(om, 1, i, omxVectorElement(od->meansMat, omxVectorElement(colList, i)));
331         }
332
333         return om;
334 }
335
336 omxThresholdColumn* omxDataThresholds(omxData *od) {
337     return od->thresholdCols;
338 }
339
340 void omxSetContiguousDataColumns(omxContiguousData* contiguous, omxData* data, omxMatrix* colList) {
341
342         contiguous->isContiguous = FALSE;   // Assume not contiguous
343
344         if (data->dataMat == NULL) return; // Data has no matrix elements, so skip.
345
346         omxMatrix* dataMat = data->dataMat;
347         if (dataMat->colMajor) return;      // If data matrix is column-major, there's no continuity
348         
349         int colListLength = colList->cols;              // # of columns in the cov matrix
350         double start = omxVectorElement(colList, 0);    // Data col of first column of the covariance
351         contiguous->start = (int) start;                // That's our starting point.
352         contiguous->length = colListLength;             // And the length is ncol(cov)
353         
354         for(int i = 1; i < colListLength; i++) {        // Make sure that the col list is 
355                 double next = omxVectorElement(colList, i); // contiguously increasing in column number
356                 if (next != (start + i)) return;            // If it isn't, it's not contiguous data
357         }
358         
359         contiguous->isContiguous = TRUE;    // Passed.  This is contiguous.
360 }
361
362 void omxContiguousDataRow(omxData *od, int row, int start, int Rf_length, omxMatrix* om) {
363         // TODO: Might be better to combine this with omxDataRow to make a single accessor omxDataRow with a second signature that accepts an omxContiguousData argument.
364         if(row >= od->rows) Rf_error("Invalid row");
365
366         if(om == NULL) Rf_error("Must provide an output matrix");
367         
368         int numcols = od->cols;
369         omxMatrix* dataMat = od->dataMat;
370         double *dest = om->data;
371         double *source = dataMat->data + row * numcols + start;
372         memcpy(dest, source, sizeof(double) * Rf_length);
373 }
374
375 void omxDataRow(omxData *od, int row, omxMatrix* colList, omxMatrix* om) {
376
377         if(colList == NULL || row >= od->rows) Rf_error("Invalid row or colList");
378
379         if(om == NULL) Rf_error("Must provide an output matrix");
380
381         int numcols = om->cols;
382         if(od->dataMat != NULL) { // Matrix Object
383                 omxMatrix* dataMat = od->dataMat;
384                 for(int j = 0; j < numcols; j++) {
385                         omxSetMatrixElement(om, 0, j, omxMatrixElement(dataMat, row, 
386                                                                        omxVectorElement(colList, j)));
387                 }
388         } else {                // Data Frame object
389                 double dataElement;
390                 int* locations = od->location;
391                 int** intDataColumns = od->intData;
392                 double **realDataColumns = od->realData;
393                 for(int j = 0; j < numcols; j++) {
394                         int location = locations[(int)omxVectorElement(colList, j)];
395                         if(location < 0) {
396                                 dataElement = (double) intDataColumns[~location][row];
397                         } else {
398                                 dataElement = realDataColumns[location][row];
399                         }
400                         omxSetMatrixElement(om, 0, j, dataElement);
401                 }
402         }
403 }
404
405 int omxDataIndex(omxData *od, int row) {
406         if(od->indexVector != NULL)
407                 return od->indexVector[row];
408         else return row;
409 }
410
411 int omxDataNumIdenticalRows(omxData *od, int row) {
412         if(od->identicalRows != NULL)
413                 return od->identicalRows[row];
414         else return 1;
415 }
416 int omxDataNumIdenticalMissingness(omxData *od, int row) {
417         if(od->identicalMissingness != NULL)
418                 return od->identicalMissingness[row];
419         else return 1;
420 }
421
422 int omxDataNumIdenticalDefs(omxData *od, int row){
423         if(od->identicalDefs != NULL)
424                 return od->identicalDefs[row];
425         else return 1;
426 }
427
428 int omxDataNumIdenticalContinuousRows(omxData *od, int row) {
429         if(od->numNumeric <= 0) {
430                 return od->rows;
431         }
432         return omxDataNumIdenticalRows(od, row);
433 }
434
435 int omxDataNumIdenticalContinuousMissingness(omxData *od, int row) {
436         if(od->numNumeric <= 0) {
437                 return od->rows;
438         }
439         return omxDataNumIdenticalMissingness(od, row);
440 }
441
442 int omxDataNumIdenticalOrdinalRows(omxData *od, int row) {
443         if(od->numFactor <= 0) {
444                 return od->rows;
445         }
446         return omxDataNumIdenticalRows(od, row);
447 }
448
449 int omxDataNumIdenticalOrdinalMissingness(omxData *od, int row) {
450         if(od->numFactor <= 0) {
451                 return od->rows;
452         }
453         return omxDataNumIdenticalMissingness(od, row);
454 }
455
456
457 double omxDataNumObs(omxData *od) {
458         return od->numObs;
459 }
460
461 int omxDataNumFactor(omxData *od) {
462         return od->numFactor;
463 }
464
465 int omxDataNumNumeric(omxData *od) {
466         return od->numNumeric;
467 }
468
469 const char *omxDataType(omxData *od) {
470         return od->_type;
471 }
472
473 int elementEqualsDataframe(SEXP column, int offset1, int offset2) {
474         switch (TYPEOF(column)) {
475         case REALSXP:
476                 if(ISNA(REAL(column)[offset1])) return ISNA(REAL(column)[offset2]);
477                 if(ISNA(REAL(column)[offset2])) return ISNA(REAL(column)[offset1]);
478                 return(REAL(column)[offset1] == REAL(column)[offset2]);
479         case LGLSXP:
480         case INTSXP:
481                 return(INTEGER(column)[offset1] == INTEGER(column)[offset2]);           
482         }
483         return(0);
484 }
485
486 int testRowDataframe(SEXP data, int numrow, int numcol, int i, int *row, int base) {
487         SEXP column;
488         int j, equal = TRUE;
489
490         if (i == numrow) {
491                 equal = FALSE;
492         } else {
493                 for(j = 0; j < numcol && equal; j++) {
494                         column = VECTOR_ELT(data, j);
495                         equal = elementEqualsDataframe(column, base, i);
496                 }
497         }
498
499         if (!equal) {
500                 int gap = i - base;
501                 for(j = 0; j < gap; j++) {
502                         row[base + j] = gap - j;
503                 }
504                 base = i;
505         }
506         return(base);
507 }
508
509 int elementEqualsMatrix(SEXP data, int row1, int row2, int numrow, int col) {
510         int coloffset = col * numrow;
511         switch (TYPEOF(data)) {
512         case REALSXP:
513                 if(ISNA(REAL(data)[row1 + coloffset])) return ISNA(REAL(data)[row2 + coloffset]);
514                 if(ISNA(REAL(data)[row2 + coloffset])) return ISNA(REAL(data)[row1 + coloffset]);
515                 return(REAL(data)[row1 + coloffset] == REAL(data)[row2 + coloffset]);
516         case LGLSXP:
517         case INTSXP:
518                 return(INTEGER(data)[row1 + coloffset] == INTEGER(data)[row2 + coloffset]);
519         }
520         return(0);
521 }
522
523 int testRowMatrix(SEXP data, int numrow, int numcol, int i, int *row, int base) {
524         int j, equal = TRUE;
525
526         if (i == numrow) {
527                 equal = FALSE;
528         } else {
529                 for(j = 0; j < numcol && equal; j++) {
530                         equal = elementEqualsMatrix(data, i, base, numrow, j);
531                 }
532         }
533
534         if (!equal) {
535                 int gap = i - base;
536                 for(j = 0; j < gap; j++) {
537                         row[base + j] = gap - j;
538                 }
539                 base = i;
540         }
541         return(base);
542 }
543
544 SEXP findIdenticalMatrix(SEXP data, SEXP missing, SEXP defvars,
545                          SEXP skipMissingExp, SEXP skipDefvarsExp) {
546
547         SEXP retval, identicalRows, identicalMissing, identicalDefvars;
548         int i, numrow, numcol, defvarcol;
549         int *irows, *imissing, *idefvars;
550         int baserows, basemissing, basedefvars;
551         int skipMissing, skipDefvars;
552
553         skipMissing = LOGICAL(skipMissingExp)[0];
554         skipDefvars = LOGICAL(skipDefvarsExp)[0];
555         numrow = Rf_nrows(data);
556         numcol = Rf_ncols(data);
557         defvarcol = Rf_ncols(defvars);
558         Rf_protect(retval = Rf_allocVector(VECSXP, 3));
559         Rf_protect(identicalRows = Rf_allocVector(INTSXP, numrow));
560         Rf_protect(identicalMissing = Rf_allocVector(INTSXP, numrow));
561         Rf_protect(identicalDefvars = Rf_allocVector(INTSXP, numrow));
562         irows = INTEGER(identicalRows);
563         imissing = INTEGER(identicalMissing);
564         idefvars = INTEGER(identicalDefvars);
565         if (skipMissing) {
566                 for(i = 0; i < numrow; i++) {
567                         imissing[i] = numrow - i;
568                 }
569         }
570         if (skipDefvars) {
571                 for(i = 0; i < numrow; i++) {
572                         idefvars[i] = numrow - i;
573                 }
574         }
575         baserows = 0;
576         basemissing = 0;
577         basedefvars = 0;
578         for(i = 1; i <= numrow; i++) {
579                 baserows = testRowMatrix(data, numrow, numcol, i, irows, baserows); 
580                 if (!skipMissing) {
581                         basemissing = testRowMatrix(missing, numrow, numcol, i, imissing, basemissing); 
582                 }
583                 if (!skipDefvars) {
584                         basedefvars = testRowMatrix(defvars, numrow, defvarcol, i, idefvars, basedefvars);
585                 }
586         }
587         SET_VECTOR_ELT(retval, 0, identicalRows);
588         SET_VECTOR_ELT(retval, 1, identicalMissing);
589         SET_VECTOR_ELT(retval, 2, identicalDefvars);
590         Rf_unprotect(4); // retval, identicalRows, identicalMissing, identicalDefvars
591         return retval;
592 }
593
594 SEXP findIdenticalDataFrame(SEXP data, SEXP missing, SEXP defvars,
595                             SEXP skipMissingExp, SEXP skipDefvarsExp) {
596
597         SEXP retval, identicalRows, identicalMissing, identicalDefvars;
598         int i, numrow, numcol, defvarcol;
599         int *irows, *imissing, *idefvars;
600         int baserows, basemissing, basedefvars;
601         int skipMissing, skipDefvars;
602
603         skipMissing = LOGICAL(skipMissingExp)[0];
604         skipDefvars = LOGICAL(skipDefvarsExp)[0];
605         numrow = Rf_length(VECTOR_ELT(data, 0));
606         numcol = Rf_length(data);
607         defvarcol = Rf_length(defvars);
608         Rf_protect(retval = Rf_allocVector(VECSXP, 3));
609         Rf_protect(identicalRows = Rf_allocVector(INTSXP, numrow));
610         Rf_protect(identicalMissing = Rf_allocVector(INTSXP, numrow));
611         Rf_protect(identicalDefvars = Rf_allocVector(INTSXP, numrow));
612         irows = INTEGER(identicalRows);
613         imissing = INTEGER(identicalMissing);
614         idefvars = INTEGER(identicalDefvars);
615         if (skipMissing) {
616                 for(i = 0; i < numrow; i++) {
617                         imissing[i] = numrow - i;
618                 }
619         }
620         if (skipDefvars) {
621                 for(i = 0; i < numrow; i++) {
622                         idefvars[i] = numrow - i;
623                 }
624         }
625         baserows = 0;
626         basemissing = 0;
627         basedefvars = 0;
628         for(i = 1; i <= numrow; i++) {
629                 baserows = testRowDataframe(data, numrow, numcol, i, irows, baserows); 
630                 if (!skipMissing) {
631                         basemissing = testRowMatrix(missing, numrow, numcol, i, imissing, basemissing);
632                 }
633                 if (!skipDefvars) {
634                         basedefvars = testRowDataframe(defvars, numrow, defvarcol, i, idefvars, basedefvars);
635                 }
636         }
637         SET_VECTOR_ELT(retval, 0, identicalRows);
638         SET_VECTOR_ELT(retval, 1, identicalMissing);
639         SET_VECTOR_ELT(retval, 2, identicalDefvars);
640         Rf_unprotect(4); // retval, identicalRows, identicalMissing, identicalDefvars
641         return retval;
642 }
643
644 SEXP findIdenticalRowsData2(SEXP data, SEXP missing, SEXP defvars,
645                            SEXP skipMissingness, SEXP skipDefvars) {
646         if (Rf_isMatrix(data)) {
647                 return(findIdenticalMatrix(data, missing, defvars, skipMissingness, skipDefvars));
648         } else {
649                 return(findIdenticalDataFrame(data, missing, defvars, skipMissingness, skipDefvars));
650         }
651 }
652
653 SEXP findIdenticalRowsData(SEXP data, SEXP missing, SEXP defvars,
654                            SEXP skipMissingness, SEXP skipDefvars)
655 {
656         omxManageProtectInsanity protectManager;
657
658         try {
659                 return findIdenticalRowsData2(data, missing, defvars,
660                                               skipMissingness, skipDefvars);
661         } catch( std::exception& __ex__ ) {
662                 exception_to_try_Rf_error( __ex__ );
663         } catch(...) {
664                 string_to_try_Rf_error( "c++ exception (unknown reason)" );
665         }
666 }
667
668
669 void omxPrintData(omxData *od, const char *header, int maxRows)
670 {
671         if (!header) header = "Default data";
672
673         if (!od) {
674                 mxLog("%s: NULL", header);
675                 return;
676         }
677
678         std::string buf;
679         buf += string_snprintf("%s(%s): %f observations %d x %d\n", header, od->_type, od->numObs,
680                                od->rows, od->cols);
681         buf += string_snprintf("Row consists of %d numeric, %d ordered factor:", od->numNumeric, od->numFactor);
682
683         int upto = od->numObs;
684         if (maxRows >= 0 && maxRows < upto) upto = maxRows;
685
686         if (od->location) {
687                 for(int j = 0; j < od->cols; j++) {
688                         int loc = od->location[j];
689                         if (loc < 0) {
690                                 buf += " I";
691                         } else {
692                                 buf += " N";
693                         }
694                 }
695                 buf += "\n";
696
697                 for (int vx=0; vx < upto; vx++) {
698                         for (int j = 0; j < od->cols; j++) {
699                                 int loc = od->location[j];
700                                 if (loc < 0) {
701                                         int *val = od->intData[~loc];
702                                         if (val[vx] == NA_INTEGER) {
703                                                 buf += " NA,";
704                                         } else {
705                                                 buf += string_snprintf(" %d,", val[vx]);
706                                         }
707                                 } else {
708                                         double *val = od->realData[loc];
709                                         if (val[vx] == NA_REAL) {
710                                                 buf += " NA,";
711                                         } else {
712                                                 buf += string_snprintf(" %.3g,", val[vx]);
713                                         }
714                                 }
715                         }
716                         buf += "\n";
717                 }
718         }
719
720         if (od->identicalRows) {
721                 buf += "DUPS\trow\tmissing\tdefvars\n";
722                 for(int j = 0; j < upto; j++) {
723                         buf += string_snprintf("%d\t%d\t%d\t%d\n", j, od->identicalRows[j],
724                                                od->identicalMissingness[j], od->identicalDefs[j]);
725                 }
726         }
727         mxLogBig(buf);
728
729         if (od->dataMat) omxPrintMatrix(od->dataMat, "dataMat");
730         if (od->meansMat) omxPrintMatrix(od->meansMat, "meansMat");
731 }
732
733 void omxPrintData(omxData *od, const char *header)
734 {
735         omxPrintData(od, header, -1);
736 }
737