@@ -4,12 +4,23 @@ library(ggplot2)
44library(dplyr )
55library(ggridges )
66library(synapser )
7-
7+ library( RColorBrewer )
88# #COLORS: standardize here
9- modelcolors <- c()
10- datasetcolors <- c()
119
12- exvivo = c(' mpnst' ,' beataml' ,' sarcpdo' ,' pancpdo' ,' bladderpdo' )
10+ modelcolors <- RColorBrewer :: brewer.pal(n = 6 ,name = ' RdYlBu' )
11+ names(modelcolors ) <- c(' deepttc' ,' graphdrp' ,' lgbm' ,' pathdsp' ,' uno' )
12+
13+
14+ exvivo = c(' mpnst' ,' beataml' ,' sarcpdo' ,' pancpdo' ,' bladderpdo' ,' liverpdo' )
15+ cellline = c(' nci60' ,' ctrpv2' ,' fimm' ,' gcsi' ,' gdscv1' ,' gdscv2' ,' prism' ,' ccle' )
16+
17+ ccols = RColorBrewer :: brewer.pal(n = length(cellline ),name = ' RdBu' )
18+ names(ccols ) <- cellline
19+
20+ ecols = RColorBrewer :: brewer.pal(n = length(exvivo ),name = ' PRGn' )
21+ names(ecols ) <- exvivo
22+
23+ datasetcolors <- c(ccols ,ecols )
1324
1425synapser :: synLogin()
1526
@@ -38,11 +49,10 @@ getModelPerformanceData <- function(){
3849
3950
4051# ##these files are very big so i'm not sure how to deal with them.
41- getModelPredictionData <- function (dset = ' lgbm' ){
52+ getModelPredictionData <- function (dset = ' lgbm' ) {
4253
4354 preds <- list (deepttc = ' syn68149793' , graphdrp = ' syn68146828' , lgbm = ' syn68149807' , pathdsp = ' syn66772452' , uno = ' syn68149809' )
4455
45-
4656 fullres <- do.call(rbind ,lapply(dset ,function (mod )
4757 readr :: read_csv(synapser :: synGet(preds [[mod ]])$ path ) | > mutate(model = mod )))
4858
@@ -75,7 +85,8 @@ ridgelineMetricPlots <- function(metric,dataset=cdres, prefix='all'){
7585 ggplot(aes(x = value ,y = trg ,fill = model )) +
7686 ggridges :: geom_density_ridges(alpha = 0.5 ) +
7787 facet_grid(src ~ . ) +
78- ggtitle(paste0(metric ,' by source dataset' ))
88+ ggtitle(paste0(metric ,' by source dataset' ))+
89+ scale_fill_manual(values = modelcolors )
7990
8091 # #now we rerank by target dataset and evaluate by target
8192 mvals <- sr | > group_by(trg ) | >
@@ -88,17 +99,70 @@ ridgelineMetricPlots <- function(metric,dataset=cdres, prefix='all'){
8899 ggplot(aes(x = value ,y = src ,fill = model )) +
89100 ggridges :: geom_density_ridges(alpha = 0.5 ) +
90101 facet_grid(trg ~ . ) +
91- ggtitle(paste0(metric ,' by target dataset' ))
102+ ggtitle(paste0(metric ,' by target dataset' ))+
103+ scale_fill_manual(values = modelcolors )
104+
92105
93106 return (list (src = p1 ,trg = p3 ))
94107}
95108
96109
97110# #here we have to interrogate the results to visualize how specific drugs are behaving
98- performanceByDrugOrSample <- function (){
111+ performanceByDrugOrSample <- function (){
99112
100113}
101114
115+ # # calculate source dataset statistics
116+ # # how do features of the source dataset impact performance?
117+ calcSourceStatistics <- function (metric , dataset = cdres ){
118+ # number of combos
119+ combos = c(ccle = 10911 ,ctrpv2 = 303520 ,fimm = 2457 ,gcsi = 12320 ,
120+ gdscv1 = 105808 ,gdscv2 = 45323 , nci60 = 2317205 ,prism = 633169 )
121+
122+ numsamples = c(ccle = 503 , ctrpv2 = 847 , fimm = 52 , gcsi = 571 , gdscv1 = 984 ,
123+ gdscv2 = 806 , nci60 = 83 , prism = 478 )
124+ numdrugs = c(ccle = 24 , ctrpv2 = 461 , fimm = 52 , gcsi = 43 , gdscv1 = 296 , gdscv2 = 169 ,
125+ nci60 = 54707 , prism = 1418 )
126+
127+ stats = data.frame (Samples = numsamples , Drugs = numdrugs , Combos = combos )
128+ stats $ src = rownames(stats )
129+
130+ # todo: we can also evaluate number of samples or drugs
131+
132+ # e can get performance summaries
133+ gres <- dataset | >
134+ # subset(model!='uno')|>
135+ subset(met == metric ) | >
136+ group_by(met ,src ,trg ,model ) | >
137+ summarize(meanVal = mean(value ,na.rm = TRUE )) | >
138+ left_join(stats ) | >
139+ arrange(meanVal )
140+
141+ mom <- gres | > group_by(src ,Combos )| > summarize(mv = mean(meanVal ))| > arrange(mv )
142+
143+ # gres <- subset(gres,met=='scc')
144+ gres $ src = factor (gres $ src ,levels = unique(mom $ src ))
145+
146+ p1 <- ggplot(gres , aes(x = Samples , y = meanVal ,col = model ))+
147+ geom_point()+ scale_x_log10()+ scale_color_manual(values = modelcolors )+ geom_smooth(method = lm , alpha = 0.2 )+ theme_bw()
148+
149+ p2 <- ggplot(gres , aes(x = Drugs , y = meanVal ,col = model ))+
150+ geom_point()+ scale_x_log10()+ scale_color_manual(values = modelcolors )+ geom_smooth(method = lm , alpha = 0.2 )+ theme_bw()
151+
152+ p3 <- ggplot(gres , aes(x = Combos , y = meanVal ,col = model ))+
153+ geom_point()+ scale_x_log10()+ scale_color_manual(values = modelcolors )+ geom_smooth(method = lm , alpha = 0.2 )+ theme_bw()
154+
155+ corvals <- gres | >
156+ ungroup() | >
157+ group_by(model ) | >
158+ summarize(Sample = cor(Samples ,meanVal ),Drugs = cor(Drugs ,meanVal ),Combinations = cor(Combos ,meanVal ,use = ' pairwise.complete.obs' ))| >
159+ tidyr :: pivot_longer(cols = c(2 ,3 ,4 ),names_to = ' statistic' ,values_to = ' correlation' )
160+
161+ p4 <- ggplot(corvals , aes(x = statistic ,y = correlation ,fill = model )) + geom_bar(position = ' dodge' ,stat = ' identity' ) +
162+ scale_fill_manual(values = modelcolors ) + theme_bw()
163+
164+ return (cowplot :: plot_grid(p1 ,p2 ,p3 ,p4 ,nrow = 2 ))
165+ }
102166
103167# #do we still need this function?
104168
0 commit comments