Skip to content

Commit 1c0c05f

Browse files
committed
cleanup / removal of outdated functions
1 parent dbe124d commit 1c0c05f

1 file changed

Lines changed: 0 additions & 135 deletions

File tree

coderdata/utils/stats.py

Lines changed: 0 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -135,48 +135,6 @@ def plot_response_metric(
135135
p.set_title(title_)
136136

137137

138-
def split_experiments_by_study(data: cd.Dataset) -> dict:
139-
"""
140-
Splits the CoderData object into multiple smaller CoderData objects
141-
according to the `study` recorded in the ``.experiments`` table in
142-
the CoderData object.
143-
144-
Parameters
145-
----------
146-
data : cd.Dataset
147-
The CoderData object containing the data set loaded into memory
148-
via ``coderdata.cd.Dataset()``.
149-
150-
Returns
151-
-------
152-
dict
153-
A dictionary dict[study, data] where keys `study` are the names
154-
of the study in the ``.experiments`` part of the imported
155-
CoderData object and values `data` are the filtered smaller
156-
CoderData objects containing only data corresponding to the
157-
study.
158-
"""
159-
160-
df_ret = {}
161-
experiments = data.experiments
162-
163-
# creating the groups based on 'study' to itterate over
164-
groups = experiments.groupby('study')
165-
for name, group in groups:
166-
167-
# extracting improve sample and drug ids from the provided split
168-
sample_ids = list(np.unique(group['improve_sample_id'].values))
169-
drug_ids = list(np.unique(group['improve_drug_id'].values))
170-
171-
# creating new CoderData objects that contain only data
172-
# pertaining to the study defined by the previous grouping
173-
df_ret[name] = _filter(
174-
data=data, sample_ids=sample_ids, drug_ids=drug_ids, study=name
175-
)
176-
177-
return df_ret
178-
179-
180138
def summarize_response_metric(data: cd.Dataset) -> pd.DataFrame:
181139
"""
182140
Helper function to extract basic statistics for the `experiments`
@@ -224,99 +182,6 @@ def summarize_response_metric(data: cd.Dataset) -> pd.DataFrame:
224182
return df_ret
225183

226184

227-
def _filter(
228-
data: cd.Dataset,
229-
sample_ids: list,
230-
drug_ids: list,
231-
study: str=None,
232-
) -> cd.Dataset:
233-
"""
234-
Helper function to filter down the CoderData object(s) to create
235-
independent more concise CoderData objects for further processing.
236-
This can be either splitting a dataset according to the different
237-
drug response studies (e.g. the broad_sanger dataset) or if small
238-
subsets need to be extracted (e.g. training / testing splits for
239-
machine learning)
240-
241-
Parameters
242-
----------
243-
data : cd.Dataset
244-
Contains a full CoderData object imported/loaded via
245-
``cd.DataLoader``
246-
sample_ids : list
247-
A list of improve_sample_id[s] that the CoderData object should
248-
be filtered to
249-
drug_ids : list
250-
A list of improve_drug_id[s] that the CoderData object should
251-
be filtered to
252-
study : str, default = None
253-
The drug response study that the CoderData object should be
254-
filtered to. This argument is only important for filtering the
255-
broad_sanger dataset if the splitting / filtering of the data
256-
set is based on the drug response study
257-
258-
Returns
259-
-------
260-
cd.Dataset
261-
The filtered CoderData object
262-
263-
Notes
264-
-----
265-
266-
Different data types of the CoderData object are going to be
267-
filtered using either the improve_sample_id or the improve_drug_id.
268-
269-
- cd.copynumber -> reduce based on ``improve_sample_id``
270-
- cd.drugs -> reduce based on ``improve_drug_id``
271-
- cd.experiments -> reduce based on ``study`` (only applicable if
272-
the dataset is broad_sanger)
273-
- cd.mutations -> reduce based on ``improve_sample_id``
274-
- cd.proteomics -> reduce based on ``improve_sample_id``
275-
- cd.samples -> reduce based on ``improve_sample_id``
276-
- cd.transcriptomics -> reduce based on ``improve_sample_id``
277-
278-
"""
279-
280-
# creating a deep copy of the CoderData object such that any
281-
# further operations on the object are not changing the original
282-
# object / data
283-
data_ret = deepcopy(data)
284-
285-
# filtering each individual data type down by only the improve
286-
# sample / drug ids that are present in the study
287-
if not data_ret.copy_number.empty:
288-
data_ret.copy_number = data_ret.copy_number[
289-
data_ret.copy_number['improve_sample_id'].isin(sample_ids)
290-
]
291-
if not data_ret.drugs.empty:
292-
data_ret.drugs = data_ret.drugs[
293-
data_ret.drugs['improve_drug_id'].isin(drug_ids)
294-
]
295-
if not data_ret.mutations.empty:
296-
data_ret.mutations = data_ret.mutations[
297-
data_ret.mutations['improve_sample_id'].isin(sample_ids)
298-
]
299-
if not data_ret.proteomics.empty:
300-
data_ret.proteomics = data_ret.proteomics[
301-
data_ret.proteomics['improve_sample_id'].isin(sample_ids)
302-
]
303-
if not data_ret.samples.empty:
304-
data_ret.samples = data_ret.samples[
305-
data_ret.samples['improve_sample_id'].isin(sample_ids)
306-
]
307-
if not data_ret.transcriptomics.empty:
308-
data_ret.transcriptomics = data_ret.transcriptomics[
309-
data_ret.transcriptomics['improve_sample_id'].isin(sample_ids)
310-
]
311-
if not data_ret.experiments.empty:
312-
data_ret.experiments = data_ret.experiments[
313-
data_ret.experiments['study'] == study
314-
]
315-
# TODO: do we also need to split the gene table?
316-
317-
return data_ret
318-
319-
320185
def _prepare_2d_hist_data(
321186
data: pd.DataFrame,
322187
metrics: list[str]=[

0 commit comments

Comments
 (0)