@@ -24,7 +24,8 @@ cm_df <- readr::read_tsv(
2424) %> %
2525 dplyr :: select(! ... 1 ) %> %
2626 dplyr :: filter(
27- balance_type == " balanced"
27+ balance_type == " balanced" ,
28+ dataset_type == " ic"
2829 ) %> %
2930 dplyr :: group_by(True_Label , data_split , shuffled , balance_type , feature_type ) %> %
3031 dplyr :: mutate(
@@ -69,9 +70,9 @@ confusion_matrix_gg <- (
6970 + theme_bw()
7071 + phenotypic_ggplot_theme
7172 + theme(
72- axis.text.x = element_text(angle = 90 , hjust = 1 , size = 10 ),
73+ axis.text.x = element_text(angle = 90 , hjust = 1 , size = 11 ),
7374 axis.title = element_text(size = 13 ),
74- axis.text.y = element_text(size = 10 ),
75+ axis.text.y = element_text(size = 11 ),
7576 strip.text = element_text(size = 12 ),
7677 legend.text = element_text(size = 12 ),
7778 legend.title = element_text(size = 14 ),
@@ -85,7 +86,7 @@ confusion_matrix_gg
8586results_dir <- file.path(
8687 " .." , " 3.evaluate_model" , " evaluations" , " precision_recall_curves"
8788)
88- results_file <- file.path(results_dir , " compiled_class_PR_curves.tsv" )
89+ results_file <- file.path(results_dir , " compiled_class_PR_curves.tsv.gz " )
8990
9091pr_df <- readr :: read_tsv(
9192 results_file ,
@@ -95,14 +96,16 @@ pr_df <- readr::read_tsv(
9596 " data_split" = " c" ,
9697 " shuffled" = " c" ,
9798 " feature_type" = " c" ,
98- " balance_type" = " c"
99+ " balance_type" = " c" ,
100+ " dataset_type" = " c"
99101 )
100102) %> %
101103 dplyr :: select(! `...1` ) %> %
102104 dplyr :: mutate(feature_type_with_data_split = paste0(feature_type , data_split )) %> %
103105 dplyr :: filter(
104- balance_type == " balanced"
105- )
106+ balance_type == " balanced" ,
107+ dataset_type == " ic"
108+ )
106109
107110# Order feature types for plotting
108111pr_df $ feature_type <-
@@ -165,14 +168,16 @@ f1_score_df <- readr::read_tsv(
165168 " data_split" = " c" ,
166169 " shuffled" = " c" ,
167170 " feature_type" = " c" ,
168- " balance_type" = " c"
171+ " balance_type" = " c" ,
172+ " dataset_type" = " c"
169173 )
170174) %> %
171175 dplyr :: select(! `...1` ) %> %
172176 dplyr :: mutate(feature_type_with_data_split = paste0(feature_type , data_split )) %> %
173177 dplyr :: filter(
174178 data_split == " test" ,
175- balance_type == " balanced"
179+ balance_type == " balanced" ,
180+ dataset_type == " ic"
176181 )
177182
178183# Order feature types for plotting
@@ -190,7 +195,6 @@ new_order <- c(rev(remaining_levels), front_level)
190195f1_score_df $ Phenotypic_Class <-
191196 factor (f1_score_df $ Phenotypic_Class , levels = new_order )
192197
193-
194198print(dim(f1_score_df ))
195199head(f1_score_df )
196200
@@ -248,9 +252,9 @@ confusion_matrix_subset_gg <- (
248252 + theme_bw()
249253 + phenotypic_ggplot_theme
250254 + theme(
251- axis.text.x = element_text(angle = 90 , hjust = 1 , size = 10 ),
255+ axis.text.x = element_text(angle = 90 , hjust = 1 , size = 11 ),
252256 axis.title = element_text(size = 13 ),
253- axis.text.y = element_text(size = 10 ),
257+ axis.text.y = element_text(size = 11 ),
254258 strip.text = element_text(size = 12 ),
255259 legend.text = element_text(size = 12 ),
256260 legend.title = element_text(size = 14 ),
@@ -300,19 +304,25 @@ pr_curve_subset_gg <- (
300304
301305pr_curve_subset_gg
302306
307+ # Reorder test set features for plotting f1 score summary
308+ f1_score_df $ feature_type_with_data_split <-
309+ factor (
310+ f1_score_df $ feature_type_with_data_split ,
311+ levels = c(" CP_and_DPtest" , " CPtest" , " DPtest" , " CP_areashape_onlytest" , " CP_zernike_onlytest" )
312+ )
313+
303314f1_score_subset_gg <- (
304315 ggplot(
305- f1_score_df %> %
306- dplyr :: filter(feature_type %in% !! subset_feature_spaces ),
316+ f1_score_df ,
307317 aes(x = Phenotypic_Class , y = F1_Score ))
308318 + geom_bar(aes(fill = feature_type_with_data_split ), stat = " identity" , position = " dodge" )
309319 + theme_bw()
310320 + xlab(" Single cell phenotype" )
311321 + ylab(" F1 Score (Test set)" )
312322 + scale_fill_manual(
313323 name = " Model scenario" ,
314- labels = subset_feature_type_with_data_split_labels ,
315- values = subset_feature_type_with_data_split_colors
324+ labels = c( subset_feature_type_with_data_split_labels , feature_type_with_data_split_labels ) ,
325+ values = c( subset_feature_type_with_data_split_colors , feature_type_with_data_split_colors )
316326 )
317327 + phenotypic_ggplot_theme
318328 # Decrease spacing in legend and rotate text
@@ -340,20 +350,20 @@ fig_3_gg <- (
340350 bottom_plot
341351) + plot_annotation(tag_levels = " A" ) + plot_layout(heights = c(1 , 0.6 ))
342352
343- ggsave(output_main_figure_3 , dpi = 500 , height = 14 , width = 14 )
353+ ggsave(output_main_figure_3 , dpi = 500 , height = 14 , width = 15 )
344354
345355fig_3_gg
346356
347357bottom_plot <- (
348358 pr_curve_subset_gg |
349359 f1_score_subset_gg
350- ) + plot_layout(widths = c(3 , 0.55 ))
360+ ) + plot_layout(widths = c(3 , 0.7 ))
351361
352362sup_fig_3_gg <- (
353363 wrap_elements(confusion_matrix_subset_gg ) /
354364 bottom_plot
355- ) + plot_annotation(tag_levels = " A" ) + plot_layout(heights = c(1 , 0.6 ))
365+ ) + plot_annotation(tag_levels = " A" ) + plot_layout(heights = c(1 , 0.5 ))
356366
357- ggsave(output_sup_figure_subset , dpi = 500 , height = 14 , width = 14 )
367+ ggsave(output_sup_figure_subset , dpi = 500 , height = 14 , width = 15 )
358368
359369sup_fig_3_gg
0 commit comments