Skip to content

Commit 6d549b9

Browse files
committed
initial commit of new figure outlines
1 parent 47e9df8 commit 6d549b9

7 files changed

Lines changed: 446 additions & 96 deletions
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
##figure ploting functions incorporates and standardizes the calls made across figures
2+
3+
library(ggplot2)
4+
library(dplyr)
5+
library(ggridges)
6+
library(synapser)
7+
8+
##COLORS: standardize here
9+
modelcolors <- c()
10+
datasetcolors <- c()
11+
12+
exvivo = c('mpnst','beataml','sarcpdo','pancpdo','bladderpdo')
13+
14+
synapser::synLogin()
15+
16+
getModelPerformanceData <- function(){
17+
18+
allscoreslist <- list(deepttc = 'syn65880080',graphdrp = 'syn65928973',lgbm = 'syn65880116',pathdsp = 'syn65880133',uno = 'syn65676159')
19+
20+
##with pancpdo
21+
allscoreslist <- list(deepttc = 'syn66323471',graphdrp = 'syn66323492',lgbm = 'syn66323510',pathdsp = 'syn66326173',uno = 'syn66323527')
22+
23+
fullres <- do.call(rbind,lapply(names(allscoreslist),function(mod)
24+
readr::read_csv(synapser::synGet(allscoreslist[[mod]])$path) |> mutate(model = mod)))
25+
26+
fullres <- fullres |>
27+
mutate(withinDataset = ifelse(src == trg,TRUE,FALSE))
28+
29+
#lets remove same-dataset data
30+
cdres <- subset(fullres,!withinDataset)
31+
32+
##lets remove ex vivo training
33+
cdres <- subset(cdres,!src %in% c('mpnst','beataml'))
34+
35+
return(cdres)
36+
}
37+
38+
39+
40+
###these files are very big so i'm not sure how to deal with them.
41+
getModelPredictionData<-function(dset='lgbm'){
42+
43+
preds <- list(deepttc = 'syn68149793', graphdrp = 'syn68146828', lgbm = 'syn68149807', pathdsp = 'syn66772452', uno = 'syn68149809')
44+
45+
46+
fullres <- do.call(rbind,lapply(dset,function(mod)
47+
readr::read_csv(synapser::synGet(preds[[mod]])$path) |> mutate(model = mod)))
48+
49+
return(preds)
50+
}
51+
52+
#this function plots a single metric by all the possible values
53+
#
54+
ridgelineMetricPlots <- function(metric,dataset=cdres, prefix='all'){
55+
56+
sr <- dataset |>
57+
subset(met == metric)
58+
59+
60+
##facet by source - compare performance across a single source
61+
62+
##re-rank src samples by mean metrics
63+
mvals <- sr |> group_by(src) |>
64+
summarize(mvals = mean(value)) |>
65+
arrange(mvals)
66+
67+
if (metric == 'r2') {
68+
sr$value <- sapply(sr$value,function(x) ifelse(x < (-1),-1,x))
69+
}
70+
71+
sr$src = factor(sr$src,levels = mvals$src)
72+
73+
#compare models by source dataset
74+
p1 <- sr |>
75+
ggplot(aes(x = value,y = trg,fill = model)) +
76+
ggridges::geom_density_ridges(alpha = 0.5) +
77+
facet_grid(src~.) +
78+
ggtitle(paste0(metric,' by source dataset'))
79+
80+
##now we rerank by target dataset and evaluate by target
81+
mvals <- sr |> group_by(trg) |>
82+
summarize(mvals = mean(value)) |>
83+
arrange(mvals)
84+
sr$trg = factor(sr$trg,levels = mvals$trg)
85+
86+
#plot source by target data
87+
p3 <- sr |>
88+
ggplot(aes(x = value,y = src,fill = model)) +
89+
ggridges::geom_density_ridges(alpha = 0.5) +
90+
facet_grid(trg~.) +
91+
ggtitle(paste0(metric,' by target dataset'))
92+
93+
return(list(src=p1,trg=p3))
94+
}
95+
96+
97+
##here we have to interrogate the results to visualize how specific drugs are behaving
98+
performanceByDrugOrSample<-function(){
99+
100+
}
101+
102+
103+
##do we still need this function?
104+
105+
doModelPlot <- function(metric, dataset=cdres){
106+
107+
sr <- dataset |>
108+
subset(met == metric)
109+
##re-rank src samples by mean metric
110+
mvals <- sr |>
111+
group_by(trg) |>
112+
summarize(mvals = mean(value)) |>
113+
arrange(mvals)
114+
115+
if(metric == 'r2') {
116+
sr$value <- sapply(sr$value,function (x) ifelse(x<(-1),-1,x))
117+
}
118+
119+
sr$trg = factor(sr$trg,levels = mvals$trg)
120+
121+
sr |>
122+
subset(trg %in% exvivo) |>
123+
ggplot(aes(x = value,alpha = 0.8)) +
124+
geom_histogram() + facet_grid(model~trg) +
125+
ggtitle(paste0(metric,' evaluated on ex vivo data'))
126+
127+
128+
ggsave(paste0(metric,'exVivoPerformance.png'))
129+
130+
131+
sr |> subset(!trg %in% exvivo) |>
132+
ggplot(aes(x = value,alpha = 0.8)) +
133+
geom_histogram() + facet_grid(model~trg) +
134+
ggtitle(paste0(metric,' evaluated on cell line data'))
135+
136+
137+
ggsave(paste0(metric,'CellLinePerformance.png'))
138+
139+
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
---
2+
title: "IMPROVE cell line results"
3+
author: "Sara gosline"
4+
date: "2025-03-27"
5+
output: html_document
6+
---
7+
8+
This document focuses on results we can glean from the cell line specific analysis using the IMPROVE framework and coderdata
9+
```{r setup, include = FALSE}
10+
knitr::opts_chunk$set(echo = TRUE)
11+
library(tidyverse)
12+
library(ggplot2)
13+
source('coderdataResultsFunctions.R')
14+
15+
```
16+
17+
## Collect cross-model performance data and try out plotting
18+
Currently the cross-model data results have been uploaded to synapse. Please request access on the [synapse team site](https://www.synapse.org/Team:3545388).
19+
20+
```{r data, message=FALSE, echo=FALSE, warning=FALSE}
21+
cdres <- getModelPerformanceData()
22+
23+
metrics <- c('pcc','scc')
24+
res <- lapply(metrics,function(x) {
25+
res <- doFullPlot(x,cdres)
26+
cowplot::plot_grid(res$src,res$trg)
27+
ggsave(paste0('all_',metric,'_ridglines.pdf'),height = 12,width = 10)
28+
return(res$src)
29+
})
30+
31+
print(res)
32+
33+
```
34+
35+
## Figure 3A: performance on cell lines
36+
37+
First result: evaluation on cell lines.
38+
39+
```{r model performance, message = FALSE, warning = FALSE}
40+
#current list of ex vivo datasets. include liverpdo when complete
41+
42+
43+
ccdres <- subset(cdres,!trg %in% exvivo)
44+
45+
ccres = lapply(metrics,function(x) {
46+
res <- doFullPlot(x, ccdres)
47+
cowplot::plot_grid(res$src,res$trg)
48+
ggsave(paste0('celllines',metric,'_ridglines.pdf'),height = 8,width = 10)
49+
return(res$src)
50+
})
51+
52+
print(ccres)
53+
54+
```
55+
56+
## Figure 3B - training/test set size
57+
58+
## Compare dataset size to performance
59+
60+
We wonder if the dataset size affects the predictive power.
61+
62+
```{r dataset size}
63+
64+
#number of combos
65+
combos = list(beataml = 3033,ccle = 10911,ctrpv2 = 303520,fimm = 2457 ,gcsi = 12320,
66+
gdscv1 = 105808,gdscv2 = 45323,mpnst = 250, nci60 = 2317205,prism = 633169)
67+
68+
numsamples = list()
69+
numdrugs = list()
70+
#todo: we can also evaluate number of samples or drugs
71+
72+
#e can get performance summaries
73+
gres <- ccdres |>
74+
subset(model!='uno')|>
75+
subset(met=='scc') |>
76+
group_by(met,src,trg,model) |>
77+
summarize(meanVal=mean(value)) |>
78+
left_join(data.frame(src = names(combos),sampleNum = unlist(combos))) |>
79+
arrange(meanVal)
80+
81+
mom <- gres|>group_by(src,sampleNum)|>summarize(mv=mean(meanVal))|>arrange(mv)
82+
83+
#gres <- subset(gres,met=='scc')
84+
gres$src = factor(gres$src,levels=unique(mom$src))
85+
gres |>
86+
ggplot(aes(x=src,y=meanVal,fill=model))+geom_boxplot()#+geom_jitter()
87+
88+
89+
```
90+
91+
92+
# Dataset prediction parsing
93+
Now we have to go into the individual predidictions to pull out trends
94+
95+
## Figure 3
96+
97+
What does this figure look like
98+
99+
100+
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
---
2+
title: "Figure 4 ex vivo results"
3+
author: "Sara Gosline"
4+
date: "2025-06-02"
5+
output: html_document
6+
---
7+
8+
```{r setup, include=FALSE}
9+
knitr::opts_chunk$set(echo = TRUE)
10+
```
11+
12+
Here we are focused on getting the details from a model prediction algorithm - where did it fail, where did it succeed?
13+
14+
## Data and packages
15+
16+
First we get the packages loaded and logged into synapse.
17+
```{r setup, include = FALSE}
18+
knitr::opts_chunk$set(echo = TRUE)
19+
library(tidyverse)
20+
library(ggplot2)
21+
source('coderdataResultsFunctions.R')
22+
23+
```
24+
25+
26+
The data has been uploaded by natasha and can be downloaded as follows.
27+
28+
```{r download data}
29+
30+
cdres <- getModelPerformanceData()
31+
32+
ecdres <- subset(cdres,trg %in% exvivo)
33+
34+
35+
```
36+
37+
## Figure 4A
38+
39+
40+
```{r}
41+
exres = lapply(metrics,function(x) {
42+
doFullPlot(x, ecdres,'cellline')
43+
})
44+
45+
print(exres)
46+
```
47+
## Create funtion to dive in
48+
49+
50+
51+
```{r}
52+
53+
```
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
title: "Figure 5 protein comparison"
3+
output: html_notebook
4+
---
5+
6+
The last figure is what we want to do to compare multiple omics measurements
7+
8+
```{r}
9+
knitr::opts_chunk$set(echo = TRUE)
10+
library(tidyverse)
11+
library(ggplot2)
12+
source('coderdataResultsFunctions.R')
13+
14+
```
15+
16+
17+
18+
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
---
2+
title: "Improve results interrogation"
3+
author: "Sara Gosline"
4+
date: "2025-06-02"
5+
output: html_document
6+
---
7+
8+
```{r setup, include=FALSE}
9+
knitr::opts_chunk$set(echo = TRUE)
10+
```
11+
12+
Here we are focused on getting the details from a model prediction algorithm - where did it fail, where did it succeed?
13+
14+
## Data and packages
15+
16+
First we get the packages loaded and logged into synapse.
17+
```{r setup, include = FALSE}
18+
knitr::opts_chunk$set(echo = TRUE)
19+
library(tidyverse)
20+
library(synapser)
21+
library(ggplot2)
22+
library(ggridges)
23+
synapser::synLogin()
24+
25+
```
26+
27+
28+
The data has been uploaded by natasha and can be downloaded as follows.
29+
30+
```{r download data}
31+
32+
33+
```
34+
35+
36+
37+
## Create funtion to dive in
38+
39+
40+
41+
```{r}
42+
43+
```

0 commit comments

Comments
 (0)