Skip to content

Commit 8191252

Browse files
committed
add DataFrame asMatrix() method to convert to a matrix
1 parent ed5ef14 commit 8191252

8 files changed

Lines changed: 112 additions & 1 deletion

File tree

EidosScribe/EidosHelpClasses.rtf

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,27 @@
193193
\f1\fs22 methods\
194194
\pard\pardeftab397\li720\fi-446\ri720\sb180\sa60\partightenfactor0
195195
196-
\f3\i0\fs18 \cf2 \'96\'a0(void)cbind(object\'a0source, ...)\
196+
\f3\i0\fs18 \cf2 \'96\'a0(*)asMatrix(void)\
197+
\pard\pardeftab397\li547\ri720\sb60\sa60\partightenfactor0
198+
199+
\f4\fs20 \cf2 Returns a matrix representation of the
200+
\f3\fs18 DataFrame
201+
\f4\fs20 . The matrix will have the same type as the elements of the
202+
\f3\fs18 DataFrame
203+
\f4\fs20 ; if the
204+
\f3\fs18 DataFrame
205+
\f4\fs20 contains more than one type of element, an error will be raised. The order of the columns of the
206+
\f3\fs18 DataFrame
207+
\f4\fs20 will be preserved. This method is useful, for example, if you wish to read in a text file as a matrix; you can use
208+
\f3\fs18 readCSV()
209+
\f4\fs20 to read the file as a
210+
\f3\fs18 DataFrame
211+
\f4\fs20 , and then convert it to a matrix with
212+
\f3\fs18 asMatrix()
213+
\f4\fs20 .\
214+
\pard\pardeftab397\li720\fi-446\ri720\sb180\sa60\partightenfactor0
215+
216+
\f3\fs18 \cf2 \'96\'a0(void)cbind(object\'a0source, ...)\
197217
\pard\pardeftab397\li547\ri720\sb60\sa60\partightenfactor0
198218

199219
\f4\fs20 \cf2 Adds all of the columns contained by

QtSLiM/help/EidosHelpClasses.html

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
<p class="p3">nrow =&gt; (integer$)</p>
4444
<p class="p4">The number of rows in the <span class="s1">DataFrame</span> (i.e., the number of elements in a column).<span class="Apple-converted-space">  </span>This will be the same for every column, by definition.</p>
4545
<p class="p2"><i>5.2.2<span class="Apple-converted-space">  </span></i><span class="s1"><i>DataFrame</i></span><i> methods</i></p>
46+
<p class="p3">– (*)asMatrix(void)</p>
47+
<p class="p4">Returns a matrix representation of the <span class="s1">DataFrame</span>.<span class="Apple-converted-space">  </span>The matrix will have the same type as the elements of the <span class="s1">DataFrame</span>; if the <span class="s1">DataFrame</span> contains more than one type of element, an error will be raised.<span class="Apple-converted-space">  </span>The order of the columns of the <span class="s1">DataFrame</span> will be preserved.<span class="Apple-converted-space">  </span>This method is useful, for example, if you wish to read in a text file as a matrix; you can use <span class="s1">readCSV()</span> to read the file as a <span class="s1">DataFrame</span>, and then convert it to a matrix with <span class="s1">asMatrix()</span>.</p>
4648
<p class="p3">– (void)cbind(object source, ...)</p>
4749
<p class="p4">Adds all of the columns contained by <span class="s1">source</span> (which must be a <span class="s1">Dictionary</span> or a subclass of <span class="s1">Dictionary</span> such as <span class="s1">DataFrame</span>) to the receiver.<span class="Apple-converted-space">  </span>This method makes the target <span class="s1">DataFrame</span> wider, by adding new columns.<span class="Apple-converted-space">  </span>If <span class="s1">source</span> contains a column name that is already defined in the target, an error will result.<span class="Apple-converted-space">  </span>As always for <span class="s1">DataFrame</span>, the columns of the resulting <span class="s1">DataFrame</span> must all be the same length.</p>
4850
<p class="p4">The <span class="s1">source</span> parameter may be a non-singleton vector containing multiple <span class="s1">Dictionary</span> objects, and additional <span class="s1">Dictionary</span> vectors may be supplied (thus the ellipsis in the signature).<span class="Apple-converted-space">  </span>Each <span class="s1">Dictionary</span> supplied will be added to the target, in the order supplied.</p>

VERSIONS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ development head (in the master branch):
1818
improve memory usage for Individual, down to 192 bytes from 232 bytes, by compactifying the color information for SLiMgui and rearranging ivars to minimize wasted space
1919
the color property on Individual no longer guarantees that the value read equals the value set; when a color is set, it is now converted to RGB, so named colors do not round-trip
2020
add a meanParentAge property, to make calculating generation length simpler; unavailable in WF models (like age), 0 for parentless individuals
21+
add DataFrame asMatrix() method to convert a DataFrame into a matrix, if all columns are the same type/class
2122

2223

2324
version 4.0 (Eidos version 3.0):

eidos/eidos_class_DataFrame.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ EidosValue_SP EidosDataFrame::ExecuteInstanceMethod(EidosGlobalStringID p_method
357357

358358
switch (p_method_id)
359359
{
360+
case gEidosID_asMatrix: return ExecuteMethod_asMatrix(p_method_id, p_arguments, p_interpreter);
360361
case gEidosID_cbind: return ExecuteMethod_cbind(p_method_id, p_arguments, p_interpreter);
361362
case gEidosID_rbind: return ExecuteMethod_rbind(p_method_id, p_arguments, p_interpreter);
362363
case gEidosID_subset: return ExecuteMethod_subset(p_method_id, p_arguments, p_interpreter);
@@ -366,6 +367,76 @@ EidosValue_SP EidosDataFrame::ExecuteInstanceMethod(EidosGlobalStringID p_method
366367
}
367368
}
368369

370+
// ********************* - (void)asMatrix()
371+
//
372+
EidosValue_SP EidosDataFrame::ExecuteMethod_asMatrix(EidosGlobalStringID p_method_id, const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter)
373+
{
374+
#pragma unused (p_method_id, p_interpreter)
375+
// First determine what type the matrix would be, and check that all columns match that type
376+
int64_t nrow = RowCount();
377+
const EidosDictionaryHashTable *symbols = DictionarySymbols();
378+
int64_t ncol = symbols->size();
379+
EidosValue_SP type_template;
380+
const EidosClass *class_template = nullptr;
381+
382+
if (ncol == 0)
383+
{
384+
type_template = gStaticEidosValue_Logical_ZeroVec; // with no columns, we have no way to know the type, so we go with "logical", following R
385+
}
386+
else
387+
{
388+
for (auto symbols_iter : *symbols)
389+
{
390+
if (!type_template)
391+
{
392+
type_template = symbols_iter.second;
393+
if (type_template->Type() == EidosValueType::kValueObject)
394+
class_template = ((EidosValue_Object *)(type_template.get()))->Class();
395+
}
396+
else if (type_template->Type() != symbols_iter.second->Type())
397+
{
398+
EIDOS_TERMINATION << "ERROR (EidosDataFrame::ExecuteMethod_asMatrix): asMatrix() requires that every column of the target DataFrame is the same type (" << type_template->Type() << " != " << symbols_iter.second->Type() << ")." << EidosTerminate(nullptr);
399+
}
400+
else if (class_template)
401+
{
402+
const EidosClass *class_column = ((EidosValue_Object *)(symbols_iter.second.get()))->Class();
403+
404+
if (class_template != class_column)
405+
EIDOS_TERMINATION << "ERROR (EidosDataFrame::ExecuteMethod_asMatrix): asMatrix() requires that every object element in the target DataFrame is the same class (" << class_template->ClassName() << " != " << class_column->ClassName() << ")." << EidosTerminate(nullptr);
406+
}
407+
}
408+
}
409+
410+
// Create the matrix; for now we use a slow implementation that is type-agnostic and does not resize to fit first, probably this is unlikely to be a bottleneck
411+
//int64_t data_count = nrow * ncol;
412+
EidosValue_SP result_SP = type_template->NewMatchingType();
413+
EidosValue *result = result_SP.get();
414+
415+
//result_SP->resize_no_initialize(data_count);
416+
417+
// Fill in all the values, in sorted column order
418+
const std::vector<std::string> *keys = SortedKeys();
419+
420+
for (const std::string &key : *keys)
421+
{
422+
auto key_iter = symbols->find(key);
423+
424+
if (key_iter == symbols->end())
425+
EIDOS_TERMINATION << "ERROR (EidosDataFrame::ExecuteMethod_asMatrix): (internal error) key not found." << EidosTerminate(nullptr);
426+
427+
EidosValue *column_value = key_iter->second.get();
428+
429+
for (int64_t i = 0; i < nrow; ++i)
430+
result->PushValueFromIndexOfEidosValue((int)i, *column_value, nullptr);
431+
}
432+
433+
const int64_t dim_buf[2] = {nrow, ncol};
434+
435+
result_SP->SetDimensions(2, dim_buf);
436+
437+
return result_SP;
438+
}
439+
369440
// ********************* - (void)cbind(object source, ...)
370441
//
371442
EidosValue_SP EidosDataFrame::ExecuteMethod_cbind(EidosGlobalStringID p_method_id, const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter)
@@ -1124,6 +1195,7 @@ const std::vector<EidosMethodSignature_CSP> *EidosDataFrame_Class::Methods(void)
11241195
{
11251196
methods = new std::vector<EidosMethodSignature_CSP>(*super::Methods());
11261197

1198+
methods->emplace_back((EidosInstanceMethodSignature *)(new EidosInstanceMethodSignature(gEidosStr_asMatrix, kEidosValueMaskAny)));
11271199
methods->emplace_back((EidosInstanceMethodSignature *)(new EidosInstanceMethodSignature(gEidosStr_cbind, kEidosValueMaskVOID))->AddObject("source", nullptr)->AddEllipsis());
11281200
methods->emplace_back((EidosInstanceMethodSignature *)(new EidosInstanceMethodSignature(gEidosStr_rbind, kEidosValueMaskVOID))->AddObject("source", nullptr)->AddEllipsis());
11291201
methods->emplace_back((EidosInstanceMethodSignature *)(new EidosInstanceMethodSignature(gEidosStr_subset, kEidosValueMaskAny))

eidos/eidos_class_DataFrame.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class EidosDataFrame : public EidosDictionaryRetained
6464
virtual EidosValue_SP GetProperty(EidosGlobalStringID p_property_id) override;
6565

6666
virtual EidosValue_SP ExecuteInstanceMethod(EidosGlobalStringID p_method_id, const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter) override;
67+
EidosValue_SP ExecuteMethod_asMatrix(EidosGlobalStringID p_method_id, const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);
6768
EidosValue_SP ExecuteMethod_cbind(EidosGlobalStringID p_method_id, const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);
6869
EidosValue_SP ExecuteMethod_rbind(EidosGlobalStringID p_method_id, const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);
6970
EidosValue_SP ExecuteMethod_subset(EidosGlobalStringID p_method_id, const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);

eidos/eidos_globals.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2732,6 +2732,7 @@ const std::string &gEidosStr_colNames = EidosRegisteredString("colNames", gEidos
27322732
const std::string &gEidosStr_dim = EidosRegisteredString("dim", gEidosID_dim);
27332733
const std::string &gEidosStr_ncol = EidosRegisteredString("ncol", gEidosID_ncol);
27342734
const std::string &gEidosStr_nrow = EidosRegisteredString("nrow", gEidosID_nrow);
2735+
const std::string &gEidosStr_asMatrix = EidosRegisteredString("asMatrix", gEidosID_asMatrix);
27352736
const std::string &gEidosStr_cbind = EidosRegisteredString("cbind", gEidosID_cbind);
27362737
const std::string &gEidosStr_rbind = EidosRegisteredString("rbind", gEidosID_rbind);
27372738
const std::string &gEidosStr_subset = EidosRegisteredString("subset", gEidosID_subset);

eidos/eidos_globals.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,7 @@ extern const std::string &gEidosStr_colNames;
907907
extern const std::string &gEidosStr_dim;
908908
extern const std::string &gEidosStr_ncol;
909909
extern const std::string &gEidosStr_nrow;
910+
extern const std::string &gEidosStr_asMatrix;
910911
extern const std::string &gEidosStr_cbind;
911912
extern const std::string &gEidosStr_rbind;
912913
extern const std::string &gEidosStr_subset;
@@ -1028,6 +1029,7 @@ enum _EidosGlobalStringID : uint32_t
10281029
gEidosID_dim,
10291030
gEidosID_ncol,
10301031
gEidosID_nrow,
1032+
gEidosID_asMatrix,
10311033
gEidosID_cbind,
10321034
gEidosID_rbind,
10331035
gEidosID_subset,

eidos/eidos_test_functions_other.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,18 @@ void _RunClassTests(std::string temp_path)
11481148
EidosAssertScriptSuccess_L("x = DataFrame('b', 1:3, 'a', c(T,F,T)); identical(x.ncol, 2);", true);
11491149
EidosAssertScriptSuccess_L("x = DataFrame('b', 1:3, 'a', c(T,F,T)); identical(x.nrow, 3);", true);
11501150

1151+
// DataFrame asMatrix()
1152+
EidosAssertScriptRaise("x = DataFrame('a', 1:3, 'b', c(T,F,T)); x.asMatrix();", 42, "is the same type (logical != integer)");
1153+
EidosAssertScriptRaise("x = DataFrame('a', DataFrame(), 'b', Dictionary()); x.asMatrix();", 54, "is the same class (Dictionary != DataFrame)");
1154+
EidosAssertScriptSuccess_L("x = DataFrame('a', 1:5, 'b', 11:15); m1 = x.asMatrix(); m2 = matrix(c(1:5, 11:15), ncol=2, byrow=F); identical(m1, m2);", true);
1155+
EidosAssertScriptSuccess_L("x = DataFrame('b', 1:5, 'a', 11:15); m1 = x.asMatrix(); m2 = matrix(c(1:5, 11:15), ncol=2, byrow=F); identical(m1, m2);", true);
1156+
EidosAssertScriptSuccess_L("x = DataFrame('b', 11:15, 'a', 1:5); m1 = x.asMatrix(); m2 = matrix(c(11:15, 1:5), ncol=2, byrow=F); identical(m1, m2);", true);
1157+
EidosAssertScriptSuccess_L("x = DataFrame('a', 11:15, 'b', 1:5); m1 = x.asMatrix(); m2 = matrix(c(11:15, 1:5), ncol=2, byrow=F); identical(m1, m2);", true);
1158+
EidosAssertScriptSuccess_L("x = DataFrame('b', 11.0:15, 'a', 1.0:5); m1 = x.asMatrix(); m2 = matrix(c(11.0:15, 1.0:5), ncol=2, byrow=F); identical(m1, m2);", true);
1159+
EidosAssertScriptSuccess_L("x = DataFrame('b', c('foo','bar'), 'a', c('baz','barbaz')); m1 = x.asMatrix(); m2 = matrix(c('foo','bar','baz','barbaz'), ncol=2, byrow=F); identical(m1, m2);", true);
1160+
EidosAssertScriptSuccess_L("x = DataFrame('b', c(T,T,F), 'a', c(F,T,F)); m1 = x.asMatrix(); m2 = matrix(c(T,T,F,F,T,F), ncol=2, byrow=F); identical(m1, m2);", true);
1161+
EidosAssertScriptSuccess_L("d1 = Dictionary('foo', 1:8); d2 = Dictionary('baz', 11:18); x = DataFrame('b', d1, 'a', d2); m1 = x.asMatrix(); m2 = matrix(c(d1, d2), ncol=2, byrow=F); identical(m1, m2);", true);
1162+
11511163
// DataFrame cbind()
11521164
EidosAssertScriptSuccess_L("x = DataFrame('b', 1:3, 'a', c(T,F,T)); y = DataFrame(); y.cbind(x); y.identicalContents(x);", true);
11531165
EidosAssertScriptSuccess_L("x = DataFrame('b', 1:3, 'a', c(T,F,T)); y = DataFrame('c', 2.0:4); y.cbind(x); DataFrame('c', 2.0:4, 'b', 1:3, 'a', c(T,F,T)).identicalContents(y);", true);

0 commit comments

Comments
 (0)