Skip to content

Commit add5b21

Browse files
committed
updated to include standard colors and basic plots
1 parent 7a1c4bd commit add5b21

3 files changed

Lines changed: 109 additions & 36 deletions

File tree

manuscript/coderdataResultsFunctions.R

Lines changed: 73 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,23 @@ library(ggplot2)
44
library(dplyr)
55
library(ggridges)
66
library(synapser)
7-
7+
library(RColorBrewer)
88
##COLORS: standardize here
9-
modelcolors <- c()
10-
datasetcolors <- c()
119

12-
exvivo = c('mpnst','beataml','sarcpdo','pancpdo','bladderpdo')
10+
modelcolors <- RColorBrewer::brewer.pal(n=6,name='RdYlBu')
11+
names(modelcolors) <- c('deepttc','graphdrp','lgbm','pathdsp','uno')
12+
13+
14+
exvivo = c('mpnst','beataml','sarcpdo','pancpdo','bladderpdo','liverpdo')
15+
cellline = c('nci60','ctrpv2','fimm','gcsi','gdscv1','gdscv2','prism','ccle')
16+
17+
ccols = RColorBrewer::brewer.pal(n=length(cellline),name='RdBu')
18+
names(ccols) <- cellline
19+
20+
ecols = RColorBrewer::brewer.pal(n=length(exvivo),name='PRGn')
21+
names(ecols) <- exvivo
22+
23+
datasetcolors <- c(ccols,ecols)
1324

1425
synapser::synLogin()
1526

@@ -38,11 +49,10 @@ getModelPerformanceData <- function(){
3849

3950

4051
###these files are very big so i'm not sure how to deal with them.
41-
getModelPredictionData<-function(dset='lgbm'){
52+
getModelPredictionData <- function(dset='lgbm') {
4253

4354
preds <- list(deepttc = 'syn68149793', graphdrp = 'syn68146828', lgbm = 'syn68149807', pathdsp = 'syn66772452', uno = 'syn68149809')
4455

45-
4656
fullres <- do.call(rbind,lapply(dset,function(mod)
4757
readr::read_csv(synapser::synGet(preds[[mod]])$path) |> mutate(model = mod)))
4858

@@ -75,7 +85,8 @@ ridgelineMetricPlots <- function(metric,dataset=cdres, prefix='all'){
7585
ggplot(aes(x = value,y = trg,fill = model)) +
7686
ggridges::geom_density_ridges(alpha = 0.5) +
7787
facet_grid(src~.) +
78-
ggtitle(paste0(metric,' by source dataset'))
88+
ggtitle(paste0(metric,' by source dataset'))+
89+
scale_fill_manual(values=modelcolors)
7990

8091
##now we rerank by target dataset and evaluate by target
8192
mvals <- sr |> group_by(trg) |>
@@ -88,17 +99,70 @@ ridgelineMetricPlots <- function(metric,dataset=cdres, prefix='all'){
8899
ggplot(aes(x = value,y = src,fill = model)) +
89100
ggridges::geom_density_ridges(alpha = 0.5) +
90101
facet_grid(trg~.) +
91-
ggtitle(paste0(metric,' by target dataset'))
102+
ggtitle(paste0(metric,' by target dataset'))+
103+
scale_fill_manual(values=modelcolors)
104+
92105

93106
return(list(src=p1,trg=p3))
94107
}
95108

96109

97110
##here we have to interrogate the results to visualize how specific drugs are behaving
98-
performanceByDrugOrSample<-function(){
111+
performanceByDrugOrSample <- function(){
99112

100113
}
101114

115+
## calculate source dataset statistics
116+
## how do features of the source dataset impact performance?
117+
calcSourceStatistics<-function(metric, dataset=cdres){
118+
#number of combos
119+
combos = c(ccle = 10911,ctrpv2 = 303520,fimm = 2457 ,gcsi = 12320,
120+
gdscv1 = 105808,gdscv2 = 45323, nci60 = 2317205,prism = 633169)
121+
122+
numsamples = c(ccle = 503, ctrpv2 = 847, fimm=52, gcsi = 571, gdscv1 = 984,
123+
gdscv2 = 806, nci60 = 83, prism = 478)
124+
numdrugs = c(ccle = 24, ctrpv2 = 461, fimm=52, gcsi = 43, gdscv1 = 296, gdscv2 = 169,
125+
nci60 = 54707, prism = 1418)
126+
127+
stats = data.frame(Samples = numsamples, Drugs =numdrugs, Combos = combos)
128+
stats$src = rownames(stats)
129+
130+
#todo: we can also evaluate number of samples or drugs
131+
132+
#e can get performance summaries
133+
gres <- dataset |>
134+
#subset(model!='uno')|>
135+
subset(met==metric) |>
136+
group_by(met,src,trg,model) |>
137+
summarize(meanVal=mean(value,na.rm=TRUE)) |>
138+
left_join(stats) |>
139+
arrange(meanVal)
140+
141+
mom <- gres|> group_by(src,Combos)|> summarize(mv = mean(meanVal))|> arrange(mv)
142+
143+
#gres <- subset(gres,met=='scc')
144+
gres$src = factor(gres$src,levels = unique(mom$src))
145+
146+
p1 <- ggplot(gres, aes(x=Samples, y = meanVal,col=model))+
147+
geom_point()+scale_x_log10()+scale_color_manual(values=modelcolors)+geom_smooth(method=lm, alpha=0.2)+theme_bw()
148+
149+
p2 <- ggplot(gres, aes(x=Drugs, y = meanVal,col=model))+
150+
geom_point()+scale_x_log10()+scale_color_manual(values=modelcolors)+geom_smooth(method=lm, alpha=0.2)+theme_bw()
151+
152+
p3 <- ggplot(gres, aes(x=Combos, y = meanVal,col=model))+
153+
geom_point()+scale_x_log10()+scale_color_manual(values=modelcolors)+geom_smooth(method=lm, alpha=0.2)+theme_bw()
154+
155+
corvals <- gres |>
156+
ungroup() |>
157+
group_by(model) |>
158+
summarize(Sample=cor(Samples,meanVal),Drugs=cor(Drugs,meanVal),Combinations=cor(Combos,meanVal,use='pairwise.complete.obs'))|>
159+
tidyr::pivot_longer(cols=c(2,3,4),names_to='statistic',values_to='correlation')
160+
161+
p4 <- ggplot(corvals, aes(x=statistic,y=correlation,fill=model)) + geom_bar(position='dodge',stat='identity') +
162+
scale_fill_manual(values=modelcolors) + theme_bw()
163+
164+
return(cowplot::plot_grid(p1,p2,p3,p4,nrow=2))
165+
}
102166

103167
##do we still need this function?
104168

manuscript/figure3CellLinePlots.Rmd

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ Currently the cross-model data results have been uploaded to synapse. Please req
2222
2323
metrics <- c('pcc','scc')
2424
res <- lapply(metrics,function(x) {
25-
res <- doFullPlot(x,cdres)
25+
res <- ridgelineMetricPlots(x,cdres)
2626
cowplot::plot_grid(res$src,res$trg)
27-
ggsave(paste0('all_',metric,'_ridglines.pdf'),height = 12,width = 10)
27+
ggsave(paste0('all_',x,'_ridglines.pdf'),height = 12,width = 10)
2828
return(res$src)
2929
})
3030
@@ -43,9 +43,9 @@ First result: evaluation on cell lines.
4343
ccdres <- subset(cdres,!trg %in% exvivo)
4444
4545
ccres = lapply(metrics,function(x) {
46-
res <- doFullPlot(x, ccdres)
46+
res <- ridgelineMetricPlots(x, ccdres)
4747
cowplot::plot_grid(res$src,res$trg)
48-
ggsave(paste0('celllines',metric,'_ridglines.pdf'),height = 8,width = 10)
48+
ggsave(paste0('celllines',x,'_ridglines.pdf'),height = 8,width = 10)
4949
return(res$src)
5050
})
5151
@@ -61,40 +61,36 @@ We wonder if the dataset size affects the predictive power.
6161

6262
```{r dataset size}
6363
64-
#number of combos
65-
combos = list(beataml = 3033,ccle = 10911,ctrpv2 = 303520,fimm = 2457 ,gcsi = 12320,
66-
gdscv1 = 105808,gdscv2 = 45323,mpnst = 250, nci60 = 2317205,prism = 633169)
64+
plot <- calcSourceStatistics('scc',ccdres)
6765
68-
numsamples = list()
69-
numdrugs = list()
70-
#todo: we can also evaluate number of samples or drugs
66+
ggsave('cellLineSamplePerformanceCorrelation.pdf', plot,height=12)
7167
72-
#e can get performance summaries
73-
gres <- ccdres |>
74-
subset(model!='uno')|>
75-
subset(met=='scc') |>
76-
group_by(met,src,trg,model) |>
77-
summarize(meanVal=mean(value)) |>
78-
left_join(data.frame(src = names(combos),sampleNum = unlist(combos))) |>
79-
arrange(meanVal)
68+
```
69+
70+
71+
# Dataset prediction parsing
72+
Now we have to go into the individual predidictions to pull out trends
73+
74+
## Figure 3C
8075

81-
mom <- gres|>group_by(src,sampleNum)|>summarize(mv=mean(meanVal))|>arrange(mv)
76+
First we can compare actual AUCs to predictive power
8277

83-
#gres <- subset(gres,met=='scc')
84-
gres$src = factor(gres$src,levels=unique(mom$src))
85-
gres |>
86-
ggplot(aes(x=src,y=meanVal,fill=model))+geom_boxplot()#+geom_jitter()
78+
```{r auc calculation}
8779
80+
##parse data, plot results
8881
8982
```
9083

9184

92-
# Dataset prediction parsing
93-
Now we have to go into the individual predidictions to pull out trends
85+
## Figure 3D
86+
87+
Compare drug sample performance
9488

95-
## Figure 3
89+
are there better performing drugs/samples?
9690

97-
What does this figure look like
91+
```{r sample/drug performance}
92+
93+
```
9894

9995

10096

manuscript/figure4ExVivoResults.Rmd

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,19 @@ exres = lapply(metrics,function(x) {
4444
4545
print(exres)
4646
```
47+
48+
Now we can confirm that the datasets follow the same pattern.
49+
50+
```{r dataset samples}
51+
52+
53+
plot<-calcSourceStatistics('scc',ecdres)
54+
55+
56+
ggsave('exVivoSamplePerformanceCorrelation.pdf',plot, height=12)
57+
58+
```
59+
4760
## Create funtion to dive in
4861

4962

0 commit comments

Comments
 (0)