@@ -64,8 +64,8 @@ def __init__(
6464
6565 Parameters
6666 ----------
67- name : str
68- The name of the dataset that is stored in the object
67+ name : str, optional
68+ The name of the dataset that is stored in the object, by default None
6969 transcriptomics : pd.DataFrame, optional
7070 _description_, by default None
7171 proteomics : pd.DataFrame, optional
@@ -386,6 +386,14 @@ def train_test_validate(
386386
387387
388388 def types (self ) -> list :
389+ """
390+ Get the data types available in the dataset.
391+
392+ Returns
393+ -------
394+ list
395+ A list of available data types (e.g., 'transcriptomics', 'proteomics').
396+ """
389397 data_types = [
390398 'transcriptomics' ,
391399 'proteomics' ,
@@ -407,7 +415,18 @@ def types(self) -> list:
407415 return data_types_present
408416
409417 def save (self , path : Path ) -> None :
418+ """
419+ Save the dataset to a file.
410420
421+ Parameters
422+ ----------
423+ path : Path
424+ The file path where the dataset will be saved.
425+
426+ Returns
427+ -------
428+ None
429+ """
411430 with open (path , 'wb' ) as f_path :
412431 pickle .dump (self , file = f_path )
413432
@@ -563,6 +582,22 @@ def format(
563582 remove_na : bool = False ,
564583 ** kwargs : dict ,
565584 ):
585+ """
586+ Format the dataset according to the specified type.
587+
588+ Parameters
589+ ----------
590+ data_type : str
591+ The type of data to format (e.g., 'transcriptomics', 'mutations').
592+ remove_na : bool, optional
593+ Whether to remove rows with missing values, by default False.
594+ **kwargs : dict
595+ Additional arguments for customization.
596+
597+ Returns
598+ -------
599+ Formatted data based on the requested type.
600+ """
566601
567602 if data_type == "transcriptomics" :
568603 if data .transcriptomics is None :
@@ -759,6 +794,31 @@ def split_train_other(
759794 random_state : Optional [Union [int ,RandomState ]]= None ,
760795 ** kwargs : dict ,
761796 ):
797+
798+ """
799+ Split the dataset into training and other subsets.
800+
801+ Parameters
802+ ----------
803+ split_type : {'mixed-set', 'drug-blind', 'cancer-blind'}, optional
804+ The type of splitting to perform, by default 'mixed-set'.
805+ ratio : tuple[int, int], optional
806+ Ratio of train to other split sizes, by default (8, 2).
807+ stratify_by : str, optional
808+ Column to use for stratification, if any, by default None.
809+ balance : bool, optional
810+ Whether to balance the split data, by default False.
811+ random_state : int | RandomState | None, optional
812+ Random seed for reproducibility, by default None.
813+ **kwargs : dict
814+ Additional arguments for customization.
815+
816+ Returns
817+ -------
818+ TwoWaySplit
819+ The resulting datasets in training and other split.
820+ """
821+
762822 train , other = _split_two_way (
763823 data = data ,
764824 split_type = split_type ,
@@ -785,6 +845,31 @@ def split_train_test_validate(
785845 random_state : Optional [Union [int ,RandomState ]]= None ,
786846 ** kwargs : dict ,
787847 ) -> Split :
848+
849+ """
850+ Split the dataset into training, testing, and validation subsets.
851+
852+ Parameters
853+ ----------
854+ split_type : {'mixed-set', 'drug-blind', 'cancer-blind'}, optional
855+ The type of splitting strategy to use, by default 'mixed-set'.
856+ ratio : tuple[int, int, int], optional
857+ Ratio for train, test, and validation sizes, by default (8,1,1).
858+ stratify_by : str, optional
859+ Column for stratification, if any, by default None.
860+ balance : bool, optional
861+ Whether to balance the splits, by default False.
862+ random_state : int | RandomState | None, optional
863+ Random seed for reproducible splits, by default None.
864+ **kwargs : dict
865+ Additional arguments for customization.
866+
867+ Returns
868+ -------
869+ Split
870+ A Split object with train, test, and validation datasets.
871+ """
872+
788873 # Type checking split_type
789874 if split_type not in [
790875 'mixed-set' , 'drug-blind' , 'cancer-blind'
0 commit comments