Skip to content

Commit 6864205

Browse files
authored
Minor tweaks to Sup Figure 8 (#59)
* update sup fig 8 * response to review
1 parent f99d92e commit 6864205

5 files changed

Lines changed: 275 additions & 238 deletions

File tree

7.figures/Figure3_model_evaluation.ipynb

Lines changed: 243 additions & 216 deletions
Large diffs are not rendered by default.
84.8 KB
Loading
23.7 KB
Loading

7.figures/nbconverted/Figure3_model_evaluation.r

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ cm_df <- readr::read_tsv(
2424
) %>%
2525
dplyr::select(!...1) %>%
2626
dplyr::filter(
27-
balance_type == "balanced"
27+
balance_type == "balanced",
28+
dataset_type == "ic"
2829
) %>%
2930
dplyr::group_by(True_Label, data_split, shuffled, balance_type, feature_type) %>%
3031
dplyr::mutate(
@@ -69,9 +70,9 @@ confusion_matrix_gg <- (
6970
+ theme_bw()
7071
+ phenotypic_ggplot_theme
7172
+ theme(
72-
axis.text.x = element_text(angle = 90, hjust = 1, size = 10),
73+
axis.text.x = element_text(angle = 90, hjust = 1, size = 11),
7374
axis.title = element_text(size = 13),
74-
axis.text.y = element_text(size = 10),
75+
axis.text.y = element_text(size = 11),
7576
strip.text = element_text(size = 12),
7677
legend.text = element_text(size = 12),
7778
legend.title = element_text(size = 14),
@@ -85,7 +86,7 @@ confusion_matrix_gg
8586
results_dir <- file.path(
8687
"..", "3.evaluate_model", "evaluations", "precision_recall_curves"
8788
)
88-
results_file <- file.path(results_dir, "compiled_class_PR_curves.tsv")
89+
results_file <- file.path(results_dir, "compiled_class_PR_curves.tsv.gz")
8990

9091
pr_df <- readr::read_tsv(
9192
results_file,
@@ -95,14 +96,16 @@ pr_df <- readr::read_tsv(
9596
"data_split" = "c",
9697
"shuffled" = "c",
9798
"feature_type" = "c",
98-
"balance_type" = "c"
99+
"balance_type" = "c",
100+
"dataset_type" = "c"
99101
)
100102
) %>%
101103
dplyr::select(!`...1`) %>%
102104
dplyr::mutate(feature_type_with_data_split = paste0(feature_type, data_split)) %>%
103105
dplyr::filter(
104-
balance_type == "balanced"
105-
)
106+
balance_type == "balanced",
107+
dataset_type == "ic"
108+
)
106109

107110
# Order feature types for plotting
108111
pr_df$feature_type <-
@@ -165,14 +168,16 @@ f1_score_df <- readr::read_tsv(
165168
"data_split" = "c",
166169
"shuffled" = "c",
167170
"feature_type" = "c",
168-
"balance_type" = "c"
171+
"balance_type" = "c",
172+
"dataset_type" = "c"
169173
)
170174
) %>%
171175
dplyr::select(!`...1`) %>%
172176
dplyr::mutate(feature_type_with_data_split = paste0(feature_type, data_split)) %>%
173177
dplyr::filter(
174178
data_split == "test",
175-
balance_type == "balanced"
179+
balance_type == "balanced",
180+
dataset_type == "ic"
176181
)
177182

178183
# Order feature types for plotting
@@ -190,7 +195,6 @@ new_order <- c(rev(remaining_levels), front_level)
190195
f1_score_df$Phenotypic_Class <-
191196
factor(f1_score_df$Phenotypic_Class, levels = new_order)
192197

193-
194198
print(dim(f1_score_df))
195199
head(f1_score_df)
196200

@@ -248,9 +252,9 @@ confusion_matrix_subset_gg <- (
248252
+ theme_bw()
249253
+ phenotypic_ggplot_theme
250254
+ theme(
251-
axis.text.x = element_text(angle = 90, hjust = 1, size = 10),
255+
axis.text.x = element_text(angle = 90, hjust = 1, size = 11),
252256
axis.title = element_text(size = 13),
253-
axis.text.y = element_text(size = 10),
257+
axis.text.y = element_text(size = 11),
254258
strip.text = element_text(size = 12),
255259
legend.text = element_text(size = 12),
256260
legend.title = element_text(size = 14),
@@ -300,19 +304,25 @@ pr_curve_subset_gg <- (
300304

301305
pr_curve_subset_gg
302306

307+
# Reorder test set features for plotting f1 score summary
308+
f1_score_df$feature_type_with_data_split <-
309+
factor(
310+
f1_score_df$feature_type_with_data_split,
311+
levels = c("CP_and_DPtest", "CPtest", "DPtest", "CP_areashape_onlytest", "CP_zernike_onlytest")
312+
)
313+
303314
f1_score_subset_gg <- (
304315
ggplot(
305-
f1_score_df %>%
306-
dplyr::filter(feature_type %in% !!subset_feature_spaces),
316+
f1_score_df,
307317
aes(x = Phenotypic_Class, y = F1_Score))
308318
+ geom_bar(aes(fill = feature_type_with_data_split), stat = "identity", position = "dodge")
309319
+ theme_bw()
310320
+ xlab("Single cell phenotype")
311321
+ ylab("F1 Score (Test set)")
312322
+ scale_fill_manual(
313323
name = "Model scenario",
314-
labels = subset_feature_type_with_data_split_labels,
315-
values = subset_feature_type_with_data_split_colors
324+
labels = c(subset_feature_type_with_data_split_labels, feature_type_with_data_split_labels),
325+
values = c(subset_feature_type_with_data_split_colors, feature_type_with_data_split_colors)
316326
)
317327
+ phenotypic_ggplot_theme
318328
# Decrease spacing in legend and rotate text
@@ -340,20 +350,20 @@ fig_3_gg <- (
340350
bottom_plot
341351
) + plot_annotation(tag_levels = "A") + plot_layout(heights = c(1, 0.6))
342352

343-
ggsave(output_main_figure_3, dpi = 500, height = 14, width = 14)
353+
ggsave(output_main_figure_3, dpi = 500, height = 14, width = 15)
344354

345355
fig_3_gg
346356

347357
bottom_plot <- (
348358
pr_curve_subset_gg |
349359
f1_score_subset_gg
350-
) + plot_layout(widths = c(3, 0.55))
360+
) + plot_layout(widths = c(3, 0.7))
351361

352362
sup_fig_3_gg <- (
353363
wrap_elements(confusion_matrix_subset_gg) /
354364
bottom_plot
355-
) + plot_annotation(tag_levels = "A") + plot_layout(heights = c(1, 0.6))
365+
) + plot_annotation(tag_levels = "A") + plot_layout(heights = c(1, 0.5))
356366

357-
ggsave(output_sup_figure_subset, dpi = 500, height = 14, width = 14)
367+
ggsave(output_sup_figure_subset, dpi = 500, height = 14, width = 15)
358368

359369
sup_fig_3_gg

7.figures/themes.r

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,10 @@ feature_type_with_data_split_labels <- c(
115115

116116
subset_feature_type_with_data_split_colors <- c(
117117
"CP_areashape_onlytest" = "#1f78b4",
118-
"CP_zernike_onlytest" = "#7beb1a",
118+
"CP_zernike_onlytest" = "#F00699",
119119

120120
"CP_areashape_onlytrain" = "#aacee6",
121-
"CP_zernike_onlytrain" = "#b2df8a"
121+
"CP_zernike_onlytrain" = "#DA8EBE"
122122
)
123123

124124
subset_feature_type_with_data_split_labels <- c(

0 commit comments

Comments
 (0)