1616import seaborn as sns
1717
1818
19+ def plot_2d_respones_metric (
20+ data : DatasetLoader ,
21+ metric1 : str ,
22+ metric2 : str ,
23+ ** kwargs : dict
24+ ) -> None :
25+
26+ data_plot = prepare_2d_hist_data (
27+ data = data ,
28+ metric1 = metric1 ,
29+ metric2 = metric2 ,
30+ )
31+
32+ joint_bins = kwargs .get ('joint_bins' , default = 50 )
33+ marginal_bins = kwargs .get ('marginal_bins' , default = 50 )
34+
35+ sns .jointplot (
36+ data = data_plot ,
37+ x = metric2 ,
38+ y = metric1 ,
39+ kind = "hist" ,
40+ joint_kws = dict (bins = joint_bins ),
41+ marginal_kws = dict (bins = marginal_bins )
42+ )
43+
44+ def plot_response_metric (
45+ data : DatasetLoader ,
46+ metric : str = 'auc' ,
47+ ax : Axes = None ,
48+ ** kwargs : dict
49+ ) -> None :
50+ """
51+ Creates a histogram detailing the distribution of dose response
52+ values for a given dose respones metric.
53+
54+ If used in conjunction with `matplotlib.pyplot.subplot` or
55+ `matplotlib.pyplot.subplots` and the axes object is passed to the
56+ function, the function populates the axes object with the generated
57+ plot.
58+
59+ Parameters
60+ ----------
61+ data : coderdata.DataLoader
62+ A full CoderData object of a dataset
63+ metric : str, default='auc'
64+ A string that defines the response metric that should be plotted
65+ ax : matplotlib.axes.Axes, default=None
66+ An `Axes` object can be defined. This is uesful if a multipannel
67+ subplot has been defined prior via `matplotlib.pyplot.subplots`.
68+ Passing the location of the axes to the function will then
69+ populate the subplot at the given location with the generated
70+ plot.
71+ **kwargs : dict, optional
72+ Additional keyword arguments that can be passed to the function
73+ - bins : int - sets the number of bins; passed to
74+ `seaborn.histplot`
75+ - title : str - sets the title of the axes
76+ - kde : bool - adds a kernel density estimate plot into the
77+ histogram
78+
79+ Returns
80+ -------
81+ None
82+
83+ Example
84+ -------
85+ In a Jupyter Notebook environment the following snippet can be used
86+ to display a histgram detailing the distribution of drug response
87+ AUC measures in the beataml dataset.
88+
89+ >>> import coderdata as cd
90+ >>> beataml = cd.DataLoader('beataml')
91+ >>> cd.plot_response_metric(data=beataml, metric='auc', bin=10)
92+
93+ For generating multipanel plots we can make use of matplotlib and
94+ the `ax` parameter of this function. Furthermore, other features /
95+ parameters of the cerated figure can be changed (e.g. the title of
96+ the figure via `suptitle()`). Finally it can be saved.
97+
98+ >>> import coderdata as cd
99+ >>> import matplotlib.pyplot as plt
100+ >>> beataml = cd.DataLoader('beataml')
101+ >>> fig, axs = plt.subplots(ncols=2, figsize=(10, 5))
102+ >>> plot_response_metric(
103+ ... data=beataml,
104+ ... metric='auc',
105+ ... bins=10,
106+ ... ax=axs[0]
107+ ... )
108+ >>> plot_response_metric(
109+ ... data=beataml,
110+ ... metric='aac',
111+ ... bins=10,
112+ ... ax=axs[0]
113+ ... )
114+ >>> fig.set_layout_engine('tight')
115+ >>> fig.suptitle('Distribution of drug response values')
116+ >>> fig.savefig('figure.png')
117+ """
118+
119+ # assinging values to variables based on **kwargs and defining
120+ # default values if not present in **kwargs
121+ bins_ = kwargs .get ('bins' , 10 )
122+ title_ = kwargs .get ('title' , None )
123+ kde_ = kwargs .get ('kde' , False )
124+
125+ # retrieving the data/values necessary to generate the figure
126+ metrics = (
127+ data .experiments # getting the experiments DF from the dataset
128+ .groupby ('dose_response_metric' ) # grouping for later
129+ )
130+ metric_ = metrics .get_group (metric ) # retrieving the desired group
131+ x = metric_ ['dose_response_value' ] # getting the values
132+
133+ sns .set_theme (palette = 'colorblind' )
134+ p = sns .histplot (data = x , kde = kde_ , bins = bins_ , ax = ax )
135+ p .set_xlabel (metric )
136+ p .set_title (title_ )
137+
138+
19139def split_experiments_by_study (data : DatasetLoader ) -> dict :
20140 """
21141 Splits the CoderData object into multiple smaller CoderData objects
@@ -58,6 +178,53 @@ def split_experiments_by_study(data: DatasetLoader) -> dict:
58178 return df_ret
59179
60180
181+ def summarize_response_metric (data : DatasetLoader ) -> pd .DataFrame :
182+ """
183+ Helper function to extract basic statistics for the `experiments`
184+ object in a CoderData object. Uses `pandas.DataFrame.describe()`
185+ internally to generate count, mean, standard deviation, minimum,
186+ 25-, 50- and 75-percentile as well as maximum for
187+ `dose_response_value` for each `dose_response_metric` present in
188+ `experiments`.
189+
190+ Parameters
191+ ----------
192+ data : coderdata.DatasetLoader
193+ A full CoderData object of a dataset
194+
195+ Returns
196+ -------
197+ pandas.DataFrame
198+ A `pandas.DataFrame` containing basic statistics for each
199+ dose response metric.
200+
201+ Example
202+ -------
203+
204+ The Example assumes that a dataset with the prefix 'beataml' has
205+ been downloaded previously. See also ``coderdata.download()``
206+
207+ >>> import coderdata as cd
208+ >>> beataml = cd.DataLoader('beataml')
209+ >>> summary_stats = summarize_response_metric(data=beataml)
210+ >>> summary_stats
211+ count mean std
212+ dose_response_metric
213+ aac 23378.0 3.028061e-01 1.821265e-01 ...
214+ auc 23378.0 6.971939e-01 1.821265e-01 ...
215+ dss 23378.0 3.218484e-01 5.733492e-01 ...
216+ ... ... ... ... ...
217+ """
218+ df_ret = (
219+ data .experiments # get experiments DF
220+ .groupby ('dose_response_metric' ) # grouping by metric
221+ ['dose_response_value' ] # value to summarize
222+ .describe () # get count, mean, std, etc.
223+ )
224+
225+ return df_ret
226+
227+
61228def _filter (
62229 data : DatasetLoader ,
63230 sample_ids : list ,
@@ -150,143 +317,52 @@ def _filter(
150317
151318 return data_ret
152319
153- def summarize_response_metric (data : DatasetLoader ) -> pd .DataFrame :
154- """
155- Helper function to extract basic statistics for the `experiments`
156- object in a CoderData object. Uses `pandas.DataFrame.describe()`
157- internally to generate count, mean, standard deviation, minimum,
158- 25-, 50- and 75-percentile as well as maximum for
159- `dose_response_value` for each `dose_response_metric` present in
160- `experiments`.
161-
162- Parameters
163- ----------
164- data : coderdata.DatasetLoader
165- A full CoderData object of a dataset
166-
167- Returns
168- -------
169- pandas.DataFrame
170- A `pandas.DataFrame` containing basic statistics for each
171- dose response metric.
172-
173- Example
174- -------
175-
176- The Example assumes that a dataset with the prefix 'beataml' has
177- been downloaded previously. See also ``coderdata.download()``
178320
179- >>> import coderdata as cd
180- >>> beataml = cd.DataLoader('beataml')
181- >>> summary_stats = summarize_response_metric(data=beataml)
182- >>> summary_stats
183- count mean std
184- dose_response_metric
185- aac 23378.0 3.028061e-01 1.821265e-01 ...
186- auc 23378.0 6.971939e-01 1.821265e-01 ...
187- dss 23378.0 3.218484e-01 5.733492e-01 ...
188- ... ... ... ... ...
189- """
190- df_ret = (
191- data .experiments # get experiments DF
192- .groupby ('dose_response_metric' ) # grouping by metric
193- ['dose_response_value' ] # value to summarize
194- .describe () # get count, mean, std, etc.
195- )
196-
197- return df_ret
198-
199-
200- def plot_response_metric (
201- data : DatasetLoader ,
202- metric : str = 'auc' ,
203- ax : Axes = None ,
204- ** kwargs : dict
205- ) -> None :
206- """
207- Creates a histogram detailing the distribution of dose response
208- values for a given dose respones metric.
209-
210- If used in conjunction with `matplotlib.pyplot.subplot` or
211- `matplotlib.pyplot.subplots` and the axes object is passed to the
212- function, the function populates the axes object with the generated
213- plot.
321+ def prepare_2d_hist_data (
322+ data : pd .DataFrame ,
323+ metrics : list [str ]= [
324+ "aac" , "auc" , "dss" ,
325+ "fit_auc" , "fit_ec50" , "fit_ec50se" ,
326+ "fit_einf" , "fit_hs" , "fit_ic50" ,
327+ "fit_r2" ,
328+ ],
329+ r2 : float = None ,
330+ ) -> pd .DataFrame :
214331
215- Parameters
216- ----------
217- data : coderdata.DataLoader
218- A full CoderData object of a dataset
219- metric : str, default='auc'
220- A string that defines the response metric that should be plotted
221- ax : matplotlib.axes.Axes, default=None
222- An `Axes` object can be defined. This is uesful if a multipannel
223- subplot has been defined prior via `matplotlib.pyplot.subplots`.
224- Passing the location of the axes to the function will then
225- populate the subplot at the given location with the generated
226- plot.
227- **kwargs : dict, optional
228- Additional keyword arguments that can be passed to the function
229- - bins : int - sets the number of bins; passed to
230- `seaborn.histplot`
231- - title : str - sets the title of the axes
232- - kde : bool - adds a kernel density estimate plot into the
233- histogram
234332
235- Returns
236- -------
237- None
333+ metric_groups = data .groupby ('dose_response_metric' )
334+
335+ if r2 is not None :
336+ r2_ = deepcopy (metric_groups .get_group ("fit_r2" ))
337+ r2_ .rename (columns = {"dose_response_value" : "r2_thresh" }, inplace = True )
338+ r2_ .drop (
339+ columns = [
340+ 'source' , 'time_unit' , 'dose_response_metric'
341+ ],
342+ inplace = True
343+ )
344+ # print(metric_groups)
345+ d_ret = deepcopy (metric_groups .get_group (metrics [0 ]))
346+ d_ret .rename (columns = {"dose_response_value" : metrics [0 ]}, inplace = True )
347+ d_ret .drop (columns = ["dose_response_metric" ], inplace = True )
348+
349+
350+ for metric in metrics [1 :]:
351+ m = deepcopy (metric_groups .get_group (metric ))
352+ m .rename (columns = {"dose_response_value" : metric }, inplace = True )
353+ m .drop (
354+ columns = [
355+ 'source' , 'time_unit' , 'dose_response_metric'
356+ ],
357+ inplace = True
358+ )
238359
239- Example
240- -------
241- In a Jupyter Notebook environment the following snippet can be used
242- to display a histgram detailing the distribution of drug response
243- AUC measures in the beataml dataset.
360+ d_ret = d_ret .merge (m , on = ["improve_drug_id" , "improve_sample_id" , "time" , "study" ])
244361
245- >>> import coderdata as cd
246- >>> beataml = cd.DataLoader('beataml')
247- >>> cd.plot_response_metric(data=beataml, metric='auc', bin=10)
362+ if r2 is not None :
363+ d_ret = d_ret .merge (r2_ , on = ["improve_drug_id" , "improve_sample_id" , "time" , "study" ])
364+ d_ret = d_ret [d_ret ["r2_thresh" ] > float (r2 )]
365+ d_ret .drop (columns = ["r2_thresh" ], inplace = True )
248366
249- For generating multipanel plots we can make use of matplotlib and
250- the `ax` parameter of this function. Furthermore, other features /
251- parameters of the cerated figure can be changed (e.g. the title of
252- the figure via `suptitle()`). Finally it can be saved.
253367
254- >>> import coderdata as cd
255- >>> import matplotlib.pyplot as plt
256- >>> beataml = cd.DataLoader('beataml')
257- >>> fig, axs = plt.subplots(ncols=2, figsize=(10, 5))
258- >>> plot_response_metric(
259- ... data=beataml,
260- ... metric='auc',
261- ... bins=10,
262- ... ax=axs[0]
263- ... )
264- >>> plot_response_metric(
265- ... data=beataml,
266- ... metric='aac',
267- ... bins=10,
268- ... ax=axs[0]
269- ... )
270- >>> fig.set_layout_engine('tight')
271- >>> fig.suptitle('Distribution of drug response values')
272- >>> fig.savefig('figure.png')
273- """
274-
275- # assinging values to variables based on **kwargs and defining
276- # default values if not present in **kwargs
277- bins_ = kwargs .get ('bins' , 10 )
278- title_ = kwargs .get ('title' , None )
279- kde_ = kwargs .get ('kde' , False )
280-
281- # retrieving the data/values necessary to generate the figure
282- metrics = (
283- data .experiments # getting the experiments DF from the dataset
284- .groupby ('dose_response_metric' ) # grouping for later
285- )
286- metric_ = metrics .get_group (metric ) # retrieving the desired group
287- x = metric_ ['dose_response_value' ] # getting the values
288-
289- sns .set_theme (palette = 'colorblind' )
290- p = sns .histplot (data = x , kde = kde_ , bins = bins_ , ax = ax )
291- p .set_xlabel (metric )
292- p .set_title (title_ )
368+ return d_ret
0 commit comments