Skip to content

Commit 4dd67fa

Browse files
committed
Add in figure plotting in Rmd
1 parent 64d08aa commit 4dd67fa

1 file changed

Lines changed: 146 additions & 0 deletions

File tree

manuscript/improveResultVis.Rmd

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
---
2+
title: "IMPROVE benchmark results"
3+
author: "Sara gosline"
4+
date: "2025-03-27"
5+
output: html_document
6+
---
7+
8+
This document describes basic analysis to carry out from the cross-study analysis work that the IMPROVE team has produced. The results are currently stored in Synapse and we illustrate how to visualize them.
9+
10+
```{r setup, include=FALSE}
11+
knitr::opts_chunk$set(echo = TRUE)
12+
library(tidyverse)
13+
library(synapser)
14+
library(ggplot2)
15+
library(ggridges)
16+
synapser::synLogin()
17+
18+
```
19+
20+
## Collect data from synapse
21+
Natasha uploaded all the results so far to synapse, let's download them systematically into a single table with the model as a new column.
22+
23+
```{r data}
24+
25+
#allscoreslist<-list(deepttc='syn65676079',graphdrp='syn65676103',lgbm='syn65676119',pathdsp='syn65676139',uno='syn65676159')
26+
#new scores with extra data
27+
allscoreslist<-list(deepttc='syn65880080',graphdrp='syn65928973',lgbm='syn65880116',pathdsp='syn65880133',uno='syn65676159')
28+
29+
##with pancpdo
30+
allscoreslist<-list(deepttc='syn66323471',graphdrp='syn66323492',lgbm='syn66323510',pathdsp='syn66326173',uno='syn66323527')
31+
32+
fullres<-do.call(rbind,lapply(names(allscoreslist),function(mod)
33+
readr::read_csv(synapser::synGet(allscoreslist[[mod]])$path)|>mutate(model=mod)))
34+
35+
```
36+
37+
We load all the data into a single table for comparison. Then we can experiment with plotting.
38+
39+
## Comparison plotting
40+
41+
We plot each model by metric and save the results in a file.
42+
43+
```{r model comparison,warning=FALSE, message=FALSE}
44+
45+
doFullPlot<-function(metric){
46+
sr<-fullres|>
47+
subset(met==metric)
48+
##re-rank src samples by mean metric
49+
mvals<-sr|>group_by(src)|>
50+
summarize(mvals=mean(value))|>
51+
arrange(mvals)
52+
53+
if(metric=='r2'){
54+
sr$value<-sapply(sr$value,function (x) ifelse(x<(-1),-1,x))
55+
}
56+
57+
sr$src=factor(sr$src,levels=mvals$src)
58+
59+
p<-sr|>
60+
ggplot(aes(x=value,y=model,fill=trg))+
61+
ggridges::geom_density_ridges(alpha=0.5)+
62+
facet_grid(src~.)+
63+
ggtitle(paste0(metric,' by source dataset'))
64+
65+
p1<-sr|>
66+
ggplot(aes(x=value,y=trg,fill=model))+
67+
ggridges::geom_density_ridges(alpha=0.5)+
68+
facet_grid(src~.)+
69+
ggtitle(paste0(metric,' by source dataset'))
70+
71+
##now plot by target
72+
mvals<-sr|>group_by(trg)|>
73+
summarize(mvals=mean(value))|>
74+
arrange(mvals)
75+
sr$trg=factor(sr$trg,levels=mvals$trg)
76+
77+
p2<-sr|>
78+
ggplot(aes(x=value,y=model,fill=src))+
79+
ggridges::geom_density_ridges(alpha=0.5)+
80+
facet_grid(trg~.)+
81+
ggtitle(paste0(metric,' by target dataset'))
82+
83+
p3<-sr|>
84+
ggplot(aes(x=value,y=src,fill=model))+
85+
ggridges::geom_density_ridges(alpha=0.5)+
86+
facet_grid(trg~.)+
87+
ggtitle(paste0(metric,' by target dataset'))
88+
cowplot::plot_grid(p,p2)
89+
ggsave(paste0(metric,'ridglines.png'),height=12,width=10)
90+
cowplot::plot_grid(p1,p3)
91+
ggsave(paste0(metric,'model_ridglines.png'),height=14,width=14)
92+
cowplot::plot_grid(p1,p3)
93+
94+
}
95+
96+
lapply(unique(fullres$met),function(x) doFullPlot(x))
97+
98+
```
99+
100+
This shows all the results, but maybe we can plot more focused questions across datasets?
101+
102+
## Model system performance
103+
Which model system performs best across cell lines vs ex vivo? Update this as we get more PDO/PDX data.
104+
105+
```{r model performance, message=FALSE, warning=FALSE}
106+
exvivo=c('mpnst','beataml','sarcpdo','pancpdo','bladderpdo')
107+
doModelPlot<-function(metric){
108+
sr<-fullres|>
109+
subset(met==metric)
110+
##re-rank src samples by mean metric
111+
mvals<-sr|>group_by(trg)|>
112+
summarize(mvals=mean(value))|>
113+
arrange(mvals)
114+
115+
if(metric=='r2'){
116+
sr$value<-sapply(sr$value,function (x) ifelse(x<(-1),-1,x))
117+
}
118+
119+
sr$trg=factor(sr$trg,levels=mvals$trg)
120+
121+
sr|>subset(trg%in%exvivo)|>
122+
ggplot(aes(x=value,alpha=0.8))+
123+
geom_histogram()+facet_grid(model~trg)+
124+
ggtitle(paste0(metric,' evaluated on ex vivo data'))
125+
126+
127+
ggsave(paste0(metric,'exVivoPerformance.png'))
128+
129+
130+
sr|>subset(!trg%in%exvivo)|>
131+
ggplot(aes(x=value,alpha=0.8))+
132+
geom_histogram()+facet_grid(model~trg)+
133+
ggtitle(paste0(metric,' evaluated on cell line data'))
134+
135+
136+
ggsave(paste0(metric,'CellLinePerformance.png'))
137+
138+
}
139+
140+
141+
lapply(unique(fullres$met),function(x) doModelPlot(x))
142+
143+
```
144+
145+
146+

0 commit comments

Comments
 (0)