|
2 | 2 |
|
3 | 3 | library(ggplot2) |
4 | 4 | library(dplyr) |
| 5 | +library(arrow) |
5 | 6 | library(ggridges) |
6 | 7 | library(synapser) |
7 | 8 | library(RColorBrewer) |
@@ -67,18 +68,31 @@ getModelPerformanceData <- function(){ |
67 | 68 | } |
68 | 69 |
|
69 | 70 |
|
70 | | - |
71 | | -###these files are very big so i'm not sure how to deal with them. |
| 71 | +# this currently only retrieves one dataset at a time and returns an appache |
| 72 | +# "arrow" tabular dataset object that can be interacted / queried via dplyr |
72 | 73 | getModelPredictionData <- function(dset='lgbm') { |
73 | 74 |
|
74 | | - preds <- list(deepttc = 'syn68149793', graphdrp = 'syn68146828', lgbm = 'syn68149807', pathdsp = 'syn66772452', uno = 'syn68149809') |
| 75 | + preds <- list(lgbm = 'syn68176033') |
75 | 76 |
|
76 | | - fullres <- do.call(rbind,lapply(dset,function(mod) |
77 | | - readr::read_csv(synapser::synGet(preds[[mod]])$path) |> mutate(model = mod))) |
| 77 | + dataset <- arrow::open_dataset( |
| 78 | + sources = synapser::synGet(preds[[dset]])$path, |
| 79 | + format = "parquet" |
| 80 | + ) |
78 | 81 |
|
79 | | - return(preds) |
| 82 | + return(dataset) |
80 | 83 | } |
81 | 84 |
|
| 85 | +###these files are very big so i'm not sure how to deal with them. |
| 86 | +# getModelPredictionData <- function(dset='lgbm') { |
| 87 | +# |
| 88 | +# preds <- list(deepttc = 'syn68149793', graphdrp = 'syn68146828', lgbm = 'syn68149807', pathdsp = 'syn66772452', uno = 'syn68149809') |
| 89 | +# |
| 90 | +# fullres <- do.call(rbind,lapply(dset,function(mod) |
| 91 | +# readr::read_csv(synapser::synGet(preds[[mod]])$path) |> mutate(model = mod))) |
| 92 | +# |
| 93 | +# return(preds) |
| 94 | +# } |
| 95 | + |
82 | 96 | #this function plots a single metric by all the possible values |
83 | 97 | # |
84 | 98 | ridgelineMetricPlots <- function(metric,dataset=cdres, prefix='all'){ |
|
0 commit comments