Skip to content

Commit 91b06ed

Browse files
added confusion matrix (to be tested with v24!), tiny text changes
1 parent 2b89ce7 commit 91b06ed

1 file changed

Lines changed: 49 additions & 7 deletions

File tree

notebooks/collections_demos/bonemarrowwsi_pediatricleukemia.ipynb

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,16 +101,19 @@
101101
},
102102
{
103103
"cell_type": "code",
104-
"execution_count": 2,
104+
"execution_count": null,
105105
"metadata": {
106106
"id": "dgtRNVatzl2s"
107107
},
108108
"outputs": [],
109109
"source": [
110110
"import os\n",
111+
"import numpy as np\n",
112+
"import pandas as pd\n",
113+
"import matplotlib.pyplot as plt\n",
114+
"import seaborn as sns\n",
111115
"import highdicom as hd\n",
112116
"from idc_index import IDCClient\n",
113-
"import pandas as pd\n",
114117
"from google.cloud import storage\n",
115118
"from pathlib import Path\n",
116119
"from typing import List, Union"
@@ -11390,13 +11393,13 @@
1139011393
},
1139111394
"source": [
1139211395
"## How to use the `BoneMarrowWSI-PediatricLeukemia` annotations\n",
11393-
"The `BoneMarrowWSI-PediatricLeukemia` collection stands out due to the extensive amount of information contained in its annotations. More than 40000 cells are annotated with bounding boxes suitable for training **cell detection models**, 28000 of those additionally received expert-generated class labels for **cell type classification** tasks. Particularly noteworthy is the uncertainty information embedded in the consensus labelling process, giving insight into which cell types are particularly challenging to determine or easy to confuse with others.\n",
11394-
"In the cell below, we catch some of those cases:"
11396+
"The `BoneMarrowWSI-PediatricLeukemia` collection stands out due to the large amount of information contained in its annotations. More than 40000 cells are annotated with bounding boxes suitable for training **cell detection models**, 28000 of those additionally received expert-generated class labels for **cell type classification** tasks. Particularly noteworthy is the uncertainty information embedded in the consensus labelling process, giving insight into which cell types are particularly challenging to determine or easy to confuse with others.\n",
11397+
"In the cell below, we catch some of those cases and display them in a confusion matrix: "
1139511398
]
1139611399
},
1139711400
{
1139811401
"cell_type": "code",
11399-
"execution_count": 33,
11402+
"execution_count": null,
1140011403
"metadata": {
1140111404
"colab": {
1140211405
"base_uri": "https://localhost:8080/",
@@ -11629,11 +11632,50 @@
1162911632
}
1163011633
],
1163111634
"source": [
11632-
"grouped_cell_labels = sorted_cell_labels.groupby('cell_id').agg({'cell_label': list, 'cell_label_code_scheme': list,\n",
11635+
"# Note: this cell may run for 2-3 minutes\n",
11636+
"labeled_cells = get_cell_annotations(subset='labeled', ann_to_process=500)\n",
11637+
"grouped_cell_labels = labeled_cells.groupby('cell_id').agg({'cell_label': list, 'cell_label_code_scheme': list,\n",
1163311638
" 'reference_SOPInstanceUID': 'first',\n",
1163411639
" 'cell_coordinates': 'first'})\n",
1163511640
"uncertain = grouped_cell_labels['cell_label'].apply(lambda x: len(set(x)) > 1)\n",
11636-
"display(grouped_cell_labels[uncertain])"
11641+
"uncertain_cells = grouped_cell_labels[uncertain]\n",
11642+
"display(uncertain_cells)"
11643+
]
11644+
},
11645+
{
11646+
"cell_type": "code",
11647+
"execution_count": null,
11648+
"metadata": {},
11649+
"outputs": [],
11650+
"source": [
11651+
"# Flatten all label pairs for each uncertain cell\n",
11652+
"label_pairs = []\n",
11653+
"for labels in uncertain_cells['cell_label']:\n",
11654+
" unique_labels = list(set(labels))\n",
11655+
" if len(unique_labels) > 1:\n",
11656+
" # Add all pairwise confusions (unordered, so sort)\n",
11657+
" for i in range(len(unique_labels)):\n",
11658+
" for j in range(i+1, len(unique_labels)):\n",
11659+
" label_pairs.append(tuple(sorted([unique_labels[i], unique_labels[j]])))\n",
11660+
"\n",
11661+
"# Get all unique labels involved in confusion\n",
11662+
"all_confused_labels = sorted(set([l for pair in label_pairs for l in pair]))\n",
11663+
"label_to_idx = {label: idx for idx, label in enumerate(all_confused_labels)}\n",
11664+
"\n",
11665+
"# Build confusion matrix\n",
11666+
"conf_matrix = np.zeros((len(all_confused_labels), len(all_confused_labels)), dtype=int)\n",
11667+
"for l1, l2 in label_pairs:\n",
11668+
" i, j = label_to_idx[l1], label_to_idx[l2]\n",
11669+
" conf_matrix[i, j] += 1\n",
11670+
" conf_matrix[j, i] += 1 # symmetric\n",
11671+
"\n",
11672+
"# Plot confusion matrix\n",
11673+
"plt.figure(figsize=(10, 8))\n",
11674+
"sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Reds', xticklabels=all_confused_labels, yticklabels=all_confused_labels)\n",
11675+
"plt.title('Confusion Matrix of Uncertain Cell Labels')\n",
11676+
"plt.xlabel('Cell Label')\n",
11677+
"plt.ylabel('Cell Label')\n",
11678+
"plt.show()"
1163711679
]
1163811680
},
1163911681
{

0 commit comments

Comments
 (0)