Skip to content

Commit bc9140e

Browse files
committed
added parquet file handling
1 parent b028af9 commit bc9140e

1 file changed

Lines changed: 20 additions & 6 deletions

File tree

manuscript/coderdataResultsFunctions.R

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
library(ggplot2)
44
library(dplyr)
5+
library(arrow)
56
library(ggridges)
67
library(synapser)
78
library(RColorBrewer)
@@ -67,18 +68,31 @@ getModelPerformanceData <- function(){
6768
}
6869

6970

70-
71-
###these files are very big so i'm not sure how to deal with them.
71+
# this currently only retrieves one dataset at a time and returns an appache
72+
# "arrow" tabular dataset object that can be interacted / queried via dplyr
7273
getModelPredictionData <- function(dset='lgbm') {
7374

74-
preds <- list(deepttc = 'syn68149793', graphdrp = 'syn68146828', lgbm = 'syn68149807', pathdsp = 'syn66772452', uno = 'syn68149809')
75+
preds <- list(lgbm = 'syn68176033')
7576

76-
fullres <- do.call(rbind,lapply(dset,function(mod)
77-
readr::read_csv(synapser::synGet(preds[[mod]])$path) |> mutate(model = mod)))
77+
dataset <- arrow::open_dataset(
78+
sources = synapser::synGet(preds[[dset]])$path,
79+
format = "parquet"
80+
)
7881

79-
return(preds)
82+
return(dataset)
8083
}
8184

85+
###these files are very big so i'm not sure how to deal with them.
86+
# getModelPredictionData <- function(dset='lgbm') {
87+
#
88+
# preds <- list(deepttc = 'syn68149793', graphdrp = 'syn68146828', lgbm = 'syn68149807', pathdsp = 'syn66772452', uno = 'syn68149809')
89+
#
90+
# fullres <- do.call(rbind,lapply(dset,function(mod)
91+
# readr::read_csv(synapser::synGet(preds[[mod]])$path) |> mutate(model = mod)))
92+
#
93+
# return(preds)
94+
# }
95+
8296
#this function plots a single metric by all the possible values
8397
#
8498
ridgelineMetricPlots <- function(metric,dataset=cdres, prefix='all'){

0 commit comments

Comments
 (0)