Skip to content

Commit ceb330a

Browse files
committed
adding complete docstrings
For inclusion in coderdata docs site
1 parent 03d12e0 commit ceb330a

2 files changed

Lines changed: 108 additions & 2 deletions

File tree

coderdata/dataset/dataset.py

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ def __init__(
6464
6565
Parameters
6666
----------
67-
name : str
68-
The name of the dataset that is stored in the object
67+
name : str, optional
68+
The name of the dataset that is stored in the object, by default None
6969
transcriptomics : pd.DataFrame, optional
7070
_description_, by default None
7171
proteomics : pd.DataFrame, optional
@@ -386,6 +386,14 @@ def train_test_validate(
386386

387387

388388
def types(self) -> list:
389+
"""
390+
Get the data types available in the dataset.
391+
392+
Returns
393+
-------
394+
list
395+
A list of available data types (e.g., 'transcriptomics', 'proteomics').
396+
"""
389397
data_types = [
390398
'transcriptomics',
391399
'proteomics',
@@ -407,7 +415,18 @@ def types(self) -> list:
407415
return data_types_present
408416

409417
def save(self, path: Path) -> None:
418+
"""
419+
Save the dataset to a file.
410420
421+
Parameters
422+
----------
423+
path : Path
424+
The file path where the dataset will be saved.
425+
426+
Returns
427+
-------
428+
None
429+
"""
411430
with open(path, 'wb') as f_path:
412431
pickle.dump(self, file=f_path)
413432

@@ -563,6 +582,22 @@ def format(
563582
remove_na: bool=False,
564583
**kwargs: dict,
565584
):
585+
"""
586+
Format the dataset according to the specified type.
587+
588+
Parameters
589+
----------
590+
data_type : str
591+
The type of data to format (e.g., 'transcriptomics', 'mutations').
592+
remove_na : bool, optional
593+
Whether to remove rows with missing values, by default False.
594+
**kwargs : dict
595+
Additional arguments for customization.
596+
597+
Returns
598+
-------
599+
Formatted data based on the requested type.
600+
"""
566601

567602
if data_type == "transcriptomics":
568603
if data.transcriptomics is None:
@@ -759,6 +794,31 @@ def split_train_other(
759794
random_state: Optional[Union[int,RandomState]]=None,
760795
**kwargs: dict,
761796
):
797+
798+
"""
799+
Split the dataset into training and other subsets.
800+
801+
Parameters
802+
----------
803+
split_type : {'mixed-set', 'drug-blind', 'cancer-blind'}, optional
804+
The type of splitting to perform, by default 'mixed-set'.
805+
ratio : tuple[int, int], optional
806+
Ratio of train to other split sizes, by default (8, 2).
807+
stratify_by : str, optional
808+
Column to use for stratification, if any, by default None.
809+
balance : bool, optional
810+
Whether to balance the split data, by default False.
811+
random_state : int | RandomState | None, optional
812+
Random seed for reproducibility, by default None.
813+
**kwargs : dict
814+
Additional arguments for customization.
815+
816+
Returns
817+
-------
818+
TwoWaySplit
819+
The resulting datasets in training and other split.
820+
"""
821+
762822
train, other = _split_two_way(
763823
data=data,
764824
split_type=split_type,
@@ -785,6 +845,31 @@ def split_train_test_validate(
785845
random_state: Optional[Union[int,RandomState]]=None,
786846
**kwargs: dict,
787847
) -> Split:
848+
849+
"""
850+
Split the dataset into training, testing, and validation subsets.
851+
852+
Parameters
853+
----------
854+
split_type : {'mixed-set', 'drug-blind', 'cancer-blind'}, optional
855+
The type of splitting strategy to use, by default 'mixed-set'.
856+
ratio : tuple[int, int, int], optional
857+
Ratio for train, test, and validation sizes, by default (8,1,1).
858+
stratify_by : str, optional
859+
Column for stratification, if any, by default None.
860+
balance : bool, optional
861+
Whether to balance the splits, by default False.
862+
random_state : int | RandomState | None, optional
863+
Random seed for reproducible splits, by default None.
864+
**kwargs : dict
865+
Additional arguments for customization.
866+
867+
Returns
868+
-------
869+
Split
870+
A Split object with train, test, and validation datasets.
871+
"""
872+
788873
# Type checking split_type
789874
if split_type not in [
790875
'mixed-set', 'drug-blind', 'cancer-blind'

coderdata/utils/stats.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,27 @@ def plot_2d_respones_metric(
2222
metric2: str,
2323
**kwargs: dict
2424
) -> None:
25+
"""
26+
Plot a 2D histogram of two response metrics from a dataset.
27+
28+
Parameters
29+
----------
30+
data : cd.Dataset
31+
The dataset containing experiment data.
32+
metric1 : str
33+
The first response metric to plot on the y-axis.
34+
metric2 : str
35+
The second response metric to plot on the x-axis.
36+
**kwargs : dict
37+
Additional keyword arguments for customizing the plot:
38+
- `joint_bins` (int): Number of bins for the joint histogram. Default is 50.
39+
- `marginal_bins` (int): Number of bins for the marginal histograms. Default is 50.
40+
41+
Returns
42+
-------
43+
None
44+
Displays the 2D histogram plot.
45+
2546
2647
data_plot = _prepare_2d_hist_data(
2748
data=data.experiments,

0 commit comments

Comments
 (0)