@@ -16,7 +16,7 @@ library(tidyverse)
1616library(ggplot2)
1717library(arrow)
1818library(dplyr)
19-
19+ library(gridExtra)
2020source('coderdataResultsFunctions.R')
2121```
2222
@@ -63,6 +63,47 @@ print(plot)
6363## Create funtion to dive in
6464
6565
66+ ``` {r}
67+ tgt = 'ccle'
68+
69+ all_preds <- do.call(
70+ rbind,
71+ lapply(
72+ models,
73+ function(mdl) getModelPredictionData(dset = mdl) |>
74+ dplyr::filter(target == tgt & source != tgt & source != 'beataml' & source != 'mpnst') |>
75+ collect()
76+ )
77+ )
78+ ```
79+ ``` {r}
80+ plot_panel <- function(data, title){
81+ data <- sample_n(data, 10000)
82+ plot <- (
83+ ggplot(data, aes(x=auc_pred, y=auc_true))
84+ + geom_point()
85+ + geom_smooth(method=lm)
86+ + facet_grid(source ~ model)
87+ + ggtitle(title)
88+ # + xlim(0, 1)
89+ # + ylim(0, 1.25)
90+ )
91+ }
92+ ```
93+ ``` {r}
94+ all_preds <- all_preds |> mutate(auc_ranges = cut(auc_true, c(-Inf, 0.2, 0.8, Inf), labels = c('auc_true <= 0.2', '0.2 < auc_true <= 0.8', 'auc_true > 0.8')))
95+
96+ ```
97+ ``` {r}
98+ ranges <- list('auc_true <= 0.2', '0.2 < auc_true <= 0.8', 'auc_true > 0.8')
99+ plots <- lapply(ranges, function(auc_range){
100+ data <- all_preds |> filter(auc_ranges == auc_range) |> collect()
101+ plot_panel(data, auc_range)
102+ })
103+ plot <- arrangeGrob(grobs = plots, ncol = 3)
104+ ggsave('ccle_auc_plot.pdf', plot, dpi=300, width=30, height=10)
105+ ```
106+
66107Full model predictions are stored on synapse as parquet files. Individual
67108datasets can be downloaded via ` getModelPredictionData ` in
68109` coderdataResultsFunctions.R ` (sources during the setup process).
@@ -78,17 +119,51 @@ all_preds <- do.call(
78119 lapply(
79120 models,
80121 function(mdl) getModelPredictionData(dset = mdl) |>
81- dplyr::filter(target == tgt) |>
122+ dplyr::filter(target == tgt & source != tgt & source != 'beataml' ) |>
82123 collect()
83124 )
84125 )
85126
86127```
87128
129+ We want to group results by drugs i.e. create a panel per drug. To that end we
130+ extract the drugs and determine the "grid layout" by size of target drugs.
131+ ``` {r}
88132
133+ drugs <- unique(all_preds$improve_chem_id)
134+ grid_ncol = 4
135+ grid_nrow = ceiling(length(drugs) / grid_ncol)
136+ ```
137+
138+ Defining the plot function for the individual "panels"
89139``` {r}
90- plot <- ggplot(all_preds, aes(x=auc_true, y=auc_pred)) + geom_point() + geom_smooth(method=lm) + facet_grid(source ~ model)
91- print(plot)
92- ggsave('mpnst_auc_plot.pdf', plot, dpi=300, width=20, height=20)
140+ plot_panel <- function(data, title){
141+ plot <- (
142+ ggplot(data, aes(x=auc_true, y=auc_pred))
143+ + geom_point()
144+ + geom_smooth(method=lm)
145+ + facet_grid(source ~ model)
146+ + ggtitle(title)
147+ + xlim(0, 1)
148+ + ylim(0, 1.25)
149+ )
150+ }
151+
93152```
94153
154+ ``` {r}
155+ plots <- lapply(drugs, function(drug_id){
156+ data <- all_preds |> filter(improve_chem_id == drug_id) |> collect()
157+ plot_panel(data, drug_id)
158+ })
159+ ```
160+
161+ ``` {r}
162+ plot <- arrangeGrob(grobs = plots, ncol = grid_ncol, nrow = grid_nrow)
163+ ```
164+
165+
166+ ``` {r}
167+ # print(plot)
168+ ggsave('mpnst_auc_plot.pdf', plot, dpi=300, width=40, height=60, limitsize = FALSE)
169+ ```
0 commit comments