@@ -14,14 +14,16 @@ First we get the packages loaded and logged into synapse.
1414knitr::opts_chunk$set(echo = TRUE)
1515library(tidyverse)
1616library(ggplot2)
17+ library(arrow)
18+ library(dplyr)
19+ library(gridExtra)
1720source('coderdataResultsFunctions.R')
18-
1921```
2022
2123
2224The data has been uploaded by natasha and can be downloaded as follows.
2325
24- ``` {r download data}
26+ ``` {r download data, eval }
2527
2628 cdres <- getModelPerformanceData()
2729
@@ -33,7 +35,7 @@ The data has been uploaded by natasha and can be downloaded as follows.
3335## Figure 4A
3436
3537
36- ``` {r}
38+ ``` {r, eval=FALSE }
3739metrics <- c('scc','pcc')
3840exres = lapply(metrics,function(x) {
3941 res<-ridgelineMetricPlots(x, ecdres,'cellline')
@@ -47,7 +49,7 @@ print(exres)
4749
4850Now we can confirm that the datasets follow the same pattern.
4951
50- ``` {r dataset samples}
52+ ``` {r dataset samples, eval=FALSE }
5153
5254
5355plot<-calcSourceStatistics('scc',ecdres)
@@ -61,7 +63,107 @@ print(plot)
6163## Create funtion to dive in
6264
6365
66+ ``` {r}
67+ tgt = 'ccle'
68+
69+ all_preds <- do.call(
70+ rbind,
71+ lapply(
72+ models,
73+ function(mdl) getModelPredictionData(dset = mdl) |>
74+ dplyr::filter(target == tgt & source != tgt & source != 'beataml' & source != 'mpnst') |>
75+ collect()
76+ )
77+ )
78+ ```
79+ ``` {r}
80+ plot_panel <- function(data, title){
81+ data <- sample_n(data, 10000)
82+ plot <- (
83+ ggplot(data, aes(x=auc_pred, y=auc_true))
84+ + geom_point()
85+ + geom_smooth(method=lm)
86+ + facet_grid(source ~ model)
87+ + ggtitle(title)
88+ # + xlim(0, 1)
89+ # + ylim(0, 1.25)
90+ )
91+ }
92+ ```
93+ ``` {r}
94+ all_preds <- all_preds |> mutate(auc_ranges = cut(auc_true, c(-Inf, 0.2, 0.8, Inf), labels = c('auc_true <= 0.2', '0.2 < auc_true <= 0.8', 'auc_true > 0.8')))
6495
96+ ```
6597``` {r}
98+ ranges <- list('auc_true <= 0.2', '0.2 < auc_true <= 0.8', 'auc_true > 0.8')
99+ plots <- lapply(ranges, function(auc_range){
100+ data <- all_preds |> filter(auc_ranges == auc_range) |> collect()
101+ plot_panel(data, auc_range)
102+ })
103+ plot <- arrangeGrob(grobs = plots, ncol = 3)
104+ ggsave('ccle_auc_plot.pdf', plot, dpi=300, width=30, height=10)
105+ ```
106+
107+ Full model predictions are stored on synapse as parquet files. Individual
108+ datasets can be downloaded via ` getModelPredictionData ` in
109+ ` coderdataResultsFunctions.R ` (sources during the setup process).
110+
111+ Here we download data from all models, subset to only MPNST target predictions
112+ and combine the individual subsets into one master table.
113+ ``` {r predictions data import}
114+
115+ tgt = 'mpnst'
66116
117+ all_preds <- do.call(
118+ rbind,
119+ lapply(
120+ models,
121+ function(mdl) getModelPredictionData(dset = mdl) |>
122+ dplyr::filter(target == tgt & source != tgt & source != 'beataml') |>
123+ collect()
124+ )
125+ )
126+
127+ ```
128+
129+ We want to group results by drugs i.e. create a panel per drug. To that end we
130+ extract the drugs and determine the "grid layout" by size of target drugs.
131+ ``` {r}
132+
133+ drugs <- unique(all_preds$improve_chem_id)
134+ grid_ncol = 4
135+ grid_nrow = ceiling(length(drugs) / grid_ncol)
136+ ```
137+
138+ Defining the plot function for the individual "panels"
139+ ``` {r}
140+ plot_panel <- function(data, title){
141+ plot <- (
142+ ggplot(data, aes(x=auc_true, y=auc_pred))
143+ + geom_point()
144+ + geom_smooth(method=lm)
145+ + facet_grid(source ~ model)
146+ + ggtitle(title)
147+ + xlim(0, 1)
148+ + ylim(0, 1.25)
149+ )
150+ }
151+
152+ ```
153+
154+ ``` {r}
155+ plots <- lapply(drugs, function(drug_id){
156+ data <- all_preds |> filter(improve_chem_id == drug_id) |> collect()
157+ plot_panel(data, drug_id)
158+ })
159+ ```
160+
161+ ``` {r}
162+ plot <- arrangeGrob(grobs = plots, ncol = grid_ncol, nrow = grid_nrow)
163+ ```
164+
165+
166+ ``` {r}
167+ # print(plot)
168+ ggsave('mpnst_auc_plot.pdf', plot, dpi=300, width=40, height=60, limitsize = FALSE)
67169```
0 commit comments