|
101 | 101 | }, |
102 | 102 | { |
103 | 103 | "cell_type": "code", |
104 | | - "execution_count": 2, |
| 104 | + "execution_count": null, |
105 | 105 | "metadata": { |
106 | 106 | "id": "dgtRNVatzl2s" |
107 | 107 | }, |
108 | 108 | "outputs": [], |
109 | 109 | "source": [ |
110 | 110 | "import os\n", |
| 111 | + "import numpy as np\n", |
| 112 | + "import pandas as pd\n", |
| 113 | + "import matplotlib.pyplot as plt\n", |
| 114 | + "import seaborn as sns\n", |
111 | 115 | "import highdicom as hd\n", |
112 | 116 | "from idc_index import IDCClient\n", |
113 | | - "import pandas as pd\n", |
114 | 117 | "from google.cloud import storage\n", |
115 | 118 | "from pathlib import Path\n", |
116 | 119 | "from typing import List, Union" |
@@ -11390,13 +11393,13 @@ |
11390 | 11393 | }, |
11391 | 11394 | "source": [ |
11392 | 11395 | "## How to use the `BoneMarrowWSI-PediatricLeukemia` annotations\n", |
11393 | | - "The `BoneMarrowWSI-PediatricLeukemia` collection stands out due to the extensive amount of information contained in its annotations. More than 40000 cells are annotated with bounding boxes suitable for training **cell detection models**, 28000 of those additionally received expert-generated class labels for **cell type classification** tasks. Particularly noteworthy is the uncertainty information embedded in the consensus labelling process, giving insight into which cell types are particularly challenging to determine or easy to confuse with others.\n", |
11394 | | - "In the cell below, we catch some of those cases:" |
| 11396 | + "The `BoneMarrowWSI-PediatricLeukemia` collection stands out due to the large amount of information contained in its annotations. More than 40000 cells are annotated with bounding boxes suitable for training **cell detection models**, 28000 of those additionally received expert-generated class labels for **cell type classification** tasks. Particularly noteworthy is the uncertainty information embedded in the consensus labelling process, giving insight into which cell types are particularly challenging to determine or easy to confuse with others.\n", |
| 11397 | + "In the cell below, we catch some of those cases and display them in a confusion matrix: " |
11395 | 11398 | ] |
11396 | 11399 | }, |
11397 | 11400 | { |
11398 | 11401 | "cell_type": "code", |
11399 | | - "execution_count": 33, |
| 11402 | + "execution_count": null, |
11400 | 11403 | "metadata": { |
11401 | 11404 | "colab": { |
11402 | 11405 | "base_uri": "https://localhost:8080/", |
@@ -11629,11 +11632,50 @@ |
11629 | 11632 | } |
11630 | 11633 | ], |
11631 | 11634 | "source": [ |
11632 | | - "grouped_cell_labels = sorted_cell_labels.groupby('cell_id').agg({'cell_label': list, 'cell_label_code_scheme': list,\n", |
| 11635 | + "# Note: this cell may run for 2-3 minutes\n", |
| 11636 | + "labeled_cells = get_cell_annotations(subset='labeled', ann_to_process=500)\n", |
| 11637 | + "grouped_cell_labels = labeled_cells.groupby('cell_id').agg({'cell_label': list, 'cell_label_code_scheme': list,\n", |
11633 | 11638 | " 'reference_SOPInstanceUID': 'first',\n", |
11634 | 11639 | " 'cell_coordinates': 'first'})\n", |
11635 | 11640 | "uncertain = grouped_cell_labels['cell_label'].apply(lambda x: len(set(x)) > 1)\n", |
11636 | | - "display(grouped_cell_labels[uncertain])" |
| 11641 | + "uncertain_cells = grouped_cell_labels[uncertain]\n", |
| 11642 | + "display(uncertain_cells)" |
| 11643 | + ] |
| 11644 | + }, |
| 11645 | + { |
| 11646 | + "cell_type": "code", |
| 11647 | + "execution_count": null, |
| 11648 | + "metadata": {}, |
| 11649 | + "outputs": [], |
| 11650 | + "source": [ |
| 11651 | + "# Flatten all label pairs for each uncertain cell\n", |
| 11652 | + "label_pairs = []\n", |
| 11653 | + "for labels in uncertain_cells['cell_label']:\n", |
| 11654 | + " unique_labels = list(set(labels))\n", |
| 11655 | + " if len(unique_labels) > 1:\n", |
| 11656 | + " # Add all pairwise confusions (unordered, so sort)\n", |
| 11657 | + " for i in range(len(unique_labels)):\n", |
| 11658 | + " for j in range(i+1, len(unique_labels)):\n", |
| 11659 | + " label_pairs.append(tuple(sorted([unique_labels[i], unique_labels[j]])))\n", |
| 11660 | + "\n", |
| 11661 | + "# Get all unique labels involved in confusion\n", |
| 11662 | + "all_confused_labels = sorted(set([l for pair in label_pairs for l in pair]))\n", |
| 11663 | + "label_to_idx = {label: idx for idx, label in enumerate(all_confused_labels)}\n", |
| 11664 | + "\n", |
| 11665 | + "# Build confusion matrix\n", |
| 11666 | + "conf_matrix = np.zeros((len(all_confused_labels), len(all_confused_labels)), dtype=int)\n", |
| 11667 | + "for l1, l2 in label_pairs:\n", |
| 11668 | + " i, j = label_to_idx[l1], label_to_idx[l2]\n", |
| 11669 | + " conf_matrix[i, j] += 1\n", |
| 11670 | + " conf_matrix[j, i] += 1 # symmetric\n", |
| 11671 | + "\n", |
| 11672 | + "# Plot confusion matrix\n", |
| 11673 | + "plt.figure(figsize=(10, 8))\n", |
| 11674 | + "sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Reds', xticklabels=all_confused_labels, yticklabels=all_confused_labels)\n", |
| 11675 | + "plt.title('Confusion Matrix of Uncertain Cell Labels')\n", |
| 11676 | + "plt.xlabel('Cell Label')\n", |
| 11677 | + "plt.ylabel('Cell Label')\n", |
| 11678 | + "plt.show()" |
11637 | 11679 | ] |
11638 | 11680 | }, |
11639 | 11681 | { |
|
0 commit comments