@@ -39,41 +39,22 @@ class Split:
3939
4040class Dataset :
4141
42- data_format_params = {
43- "samples" : (
44- "improve_sample_id" , "cancer_type" , "model_type" , "common_name" ,
45- "other_id" , "other_names" , "id_source" , "species"
46- ),
47- "transcriptomics" : (
48- "improve_sample_id" , "entrez_id" , "transcriptomics"
49- ),
50- "proteomics" : ("improve_sample_id" , "entrez_id" , "proteomics" ),
51- "mutations" : ("improve_sample_id" , "entrez_id" , "mutation" ),
52- "copy_number" : ("improve_sample_id" , "entrez_id" , "copy_number" ),
53- "methylation" : ("improve_sample_id" , "entrez_id" , "methylation" ),
54- "experiments" : (
55- "improve_sample_id" , "improve_drug_id" , "dose_response_value"
56- ),
57- "drugs" : ("improve_drug_id" , "chem_name" , "isoSMILES" ),
58- "genes" : ("entrez_id" , "gene_symbol" , "other_id" )
59- }
60-
6142 def __init__ (
6243 self ,
63- name : str = None ,
64- transcriptomics : pd .DataFrame = None ,
65- proteomics : pd .DataFrame = None ,
66- mutations : pd .DataFrame = None ,
67- copy_number : pd .DataFrame = None ,
68- samples : pd .DataFrame = None ,
69- drugs : pd .DataFrame = None ,
70- drug_descriptors : pd .DataFrame = None ,
71- mirna : pd .DataFrame = None ,
72- experiments : pd .DataFrame = None ,
73- methylation : pd .DataFrame = None ,
74- metabolomics : pd .DataFrame = None ,
75- genes : pd .DataFrame = None ,
76- combinations : pd .DataFrame = None ,
44+ name : Optional [ str ] = None ,
45+ transcriptomics : Optional [ pd .DataFrame ] = None ,
46+ proteomics : Optional [ pd .DataFrame ] = None ,
47+ mutations : Optional [ pd .DataFrame ] = None ,
48+ copy_number : Optional [ pd .DataFrame ] = None ,
49+ samples : Optional [ pd .DataFrame ] = None ,
50+ drugs : Optional [ pd .DataFrame ] = None ,
51+ drug_descriptors : Optional [ pd .DataFrame ] = None ,
52+ mirna : Optional [ pd .DataFrame ] = None ,
53+ experiments : Optional [ pd .DataFrame ] = None ,
54+ methylation : Optional [ pd .DataFrame ] = None ,
55+ metabolomics : Optional [ pd .DataFrame ] = None ,
56+ genes : Optional [ pd .DataFrame ] = None ,
57+ combinations : Optional [ pd .DataFrame ] = None ,
7758 ):
7859 """
7960 Load datasets of a specific type into predefined attributes of this class instance.
@@ -131,12 +112,6 @@ def __init__(
131112 # getters / setters & deleters
132113 # ----------------------------
133114
134-
135- @property
136- def data_format_params (self ):
137- return self ._data_format_params
138-
139-
140115 @property
141116 def name (self ):
142117 return self ._name
@@ -526,6 +501,7 @@ def load(
526501 dataset = pickle .load (file = file )
527502 print ("DONE" , file = sys .stderr )
528503 return dataset
504+ raise FileNotFoundError ("No suitable pickle file found." )
529505
530506
531507
@@ -654,7 +630,6 @@ def format(
654630 index = 'improve_drug_id' ,
655631 columns = 'improve_sample_id'
656632 )
657- return ret
658633
659634 elif data_type == "combinations" :
660635 raise NotImplementedError (
@@ -771,7 +746,7 @@ def split_train_test_validate(
771746 train , other = _split_two_way (
772747 data = data ,
773748 split_type = split_type ,
774- ratio = [ ratio [0 ], ratio [1 ] + ratio [2 ]] ,
749+ ratio = ( ratio [0 ], ratio [1 ] + ratio [2 ]) ,
775750 stratify_by = stratify_by ,
776751 balance = balance ,
777752 random_state = random_state ,
@@ -781,7 +756,7 @@ def split_train_test_validate(
781756 test , val = _split_two_way (
782757 data = other ,
783758 split_type = split_type ,
784- ratio = [ ratio [1 ], ratio [2 ]] ,
759+ ratio = ( ratio [1 ], ratio [2 ]) ,
785760 stratify_by = stratify_by ,
786761 balance = balance ,
787762 random_state = random_state ,
@@ -993,10 +968,10 @@ def _filter(data: Dataset, split: pd.DataFrame) -> Dataset:
993968 return data_ret
994969
995970def _balance_data (
996- data : pd .Dataframe ,
971+ data : pd .DataFrame ,
997972 random_state : Optional [Union [int ,RandomState ]]= None ,
998973 # oversample: bool=False,
999- ) -> pd .Dataframe :
974+ ) -> pd .DataFrame :
1000975 tmp = deepcopy (data )
1001976 counts = tmp .value_counts ('split_class' )
1002977 ret_df = (
@@ -1012,7 +987,7 @@ def _create_classes(
1012987 metric : str ,
1013988 num_classes : int = 2 ,
1014989 quantiles : bool = True ,
1015- thresh : float = None ,
990+ thresh : Optional [ float ] = None ,
1016991 ) -> pd .DataFrame :
1017992 """
1018993 Helper function that bins experiment data into a number of defined
@@ -1101,7 +1076,7 @@ def _split_two_way(
11011076 split_type : Literal [
11021077 'mixed-set' , 'drug-blind' , 'cancer-blind'
11031078 ]= 'mixed-set' ,
1104- ratio : tuple [int , int , int ]= (8 ,2 ),
1079+ ratio : tuple [int , int ]= (8 ,2 ),
11051080 balance : bool = False ,
11061081 stratify_by : Optional [str ]= None ,
11071082 random_state : Optional [Union [int ,RandomState ]]= None ,
0 commit comments