Skip to content

Commit 7c7342c

Browse files
committed
changed format('mutations') to use pd.pivot_table instead of pd.crosstab
1 parent 6bd844a commit 7c7342c

1 file changed

Lines changed: 9 additions & 6 deletions

File tree

coderdata/dataset/dataset.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -434,12 +434,16 @@ def format(
434434
raise ValueError(
435435
"'mutation_type' must be defined if 'data_type'=='mutations'"
436436
)
437-
tmp = data.mutations[
437+
tmp = deepcopy(data.mutations[
438438
data.mutations['variant_classification'] == mutation_type
439-
]
440-
ret = pd.crosstab(
441-
index=tmp['entrez_id'],
442-
columns=tmp['improve_sample_id']
439+
])
440+
tmp['exists'] = 1
441+
ret = pd.pivot_table(
442+
data=tmp,
443+
index='entrez_id',
444+
columns='improve_sample_id',
445+
values='exists',
446+
fill_value=0,
443447
)
444448

445449
elif data_type == "copy_number":
@@ -463,7 +467,6 @@ def format(
463467
f"'{data_type}' attribute of Dataset cannot be 'None'"
464468
)
465469

466-
# TODO: currently assumes that the proteomics table is properly formatted
467470
ret = pd.pivot_table(
468471
data=data.proteomics,
469472
values='proteomics',

0 commit comments

Comments
 (0)