Skip to content

Commit 3727800

Browse files
committed
Added draft of Jupyter Notebook outlining basic functionality of the new stats module
1 parent 03bb4f6 commit 3727800

1 file changed

Lines changed: 122 additions & 0 deletions

File tree

notebooks/dataset_statistics.ipynb

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"# iPython magic to autoreload modules everytime code is executed to propagate changes to the code\n",
10+
"%load_ext autoreload\n",
11+
"%autoreload 2"
12+
]
13+
},
14+
{
15+
"cell_type": "code",
16+
"execution_count": 8,
17+
"metadata": {},
18+
"outputs": [],
19+
"source": [
20+
"import coderdata as cd\n",
21+
"from coderdata.utils.stats import summarize_response_metric\n",
22+
"from coderdata.utils.stats import plot_response_metric\n",
23+
"\n",
24+
"import matplotlib.pyplot as plt\n",
25+
"import math"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": 9,
31+
"metadata": {},
32+
"outputs": [],
33+
"source": [
34+
"dataset_prefix = 'beataml'"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": null,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"cd.download_data_by_prefix(dataset_prefix)"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {},
50+
"outputs": [],
51+
"source": [
52+
"data = cd.DatasetLoader(dataset_prefix)\n",
53+
"summary_stats = summarize_response_metric(data=data)\n",
54+
"summary_stats"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": 14,
60+
"metadata": {},
61+
"outputs": [],
62+
"source": [
63+
"metrics = summary_stats.index.values"
64+
]
65+
},
66+
{
67+
"cell_type": "code",
68+
"execution_count": 23,
69+
"metadata": {},
70+
"outputs": [],
71+
"source": [
72+
"ncol = 3\n",
73+
"nrow = math.ceil(len(metrics)/ncol)"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"metadata": {},
80+
"outputs": [],
81+
"source": [
82+
"fig, axs = plt.subplots(nrows=nrow, ncols=ncol, figsize=(ncol*3, nrow*3))\n",
83+
"\n",
84+
"k = 0\n",
85+
"for i in range(0, nrow):\n",
86+
" for j in range(0, ncol):\n",
87+
" if k < len(metrics):\n",
88+
" plot_response_metric(data=data, metric=metrics[k], bins=10, ax=axs[i, j])\n",
89+
" else:\n",
90+
" axs[i, j].axis('off')\n",
91+
" k += 1\n",
92+
"\n",
93+
"fig.set_layout_engine('tight')\n",
94+
"fig.suptitle(f'Distribution of drug response values in \"{dataset_prefix}\"')\n",
95+
"\n",
96+
"# uncomment next line to save plot\n",
97+
"# fig.savefig(f'{dataset_prefix}.png')"
98+
]
99+
}
100+
],
101+
"metadata": {
102+
"kernelspec": {
103+
"display_name": "coderdata",
104+
"language": "python",
105+
"name": "python3"
106+
},
107+
"language_info": {
108+
"codemirror_mode": {
109+
"name": "ipython",
110+
"version": 3
111+
},
112+
"file_extension": ".py",
113+
"mimetype": "text/x-python",
114+
"name": "python",
115+
"nbconvert_exporter": "python",
116+
"pygments_lexer": "ipython3",
117+
"version": "3.12.6"
118+
}
119+
},
120+
"nbformat": 4,
121+
"nbformat_minor": 2
122+
}

0 commit comments

Comments
 (0)