Skip to content

Commit 314cdf9

Browse files
committed
clean up
1 parent b028af9 commit 314cdf9

1 file changed

Lines changed: 21 additions & 46 deletions

File tree

coderdata/dataset/dataset.py

Lines changed: 21 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -39,41 +39,22 @@ class Split:
3939

4040
class Dataset:
4141

42-
data_format_params = {
43-
"samples": (
44-
"improve_sample_id", "cancer_type", "model_type", "common_name",
45-
"other_id", "other_names", "id_source", "species"
46-
),
47-
"transcriptomics": (
48-
"improve_sample_id", "entrez_id", "transcriptomics"
49-
),
50-
"proteomics": ("improve_sample_id", "entrez_id", "proteomics"),
51-
"mutations": ("improve_sample_id", "entrez_id", "mutation"),
52-
"copy_number": ("improve_sample_id", "entrez_id", "copy_number"),
53-
"methylation": ("improve_sample_id", "entrez_id", "methylation"),
54-
"experiments": (
55-
"improve_sample_id", "improve_drug_id", "dose_response_value"
56-
),
57-
"drugs": ("improve_drug_id", "chem_name", "isoSMILES"),
58-
"genes": ("entrez_id", "gene_symbol", "other_id")
59-
}
60-
6142
def __init__(
6243
self,
63-
name: str=None,
64-
transcriptomics: pd.DataFrame=None,
65-
proteomics: pd.DataFrame=None,
66-
mutations: pd.DataFrame=None,
67-
copy_number: pd.DataFrame=None,
68-
samples: pd.DataFrame=None,
69-
drugs: pd.DataFrame=None,
70-
drug_descriptors: pd.DataFrame=None,
71-
mirna: pd.DataFrame=None,
72-
experiments: pd.DataFrame=None,
73-
methylation: pd.DataFrame=None,
74-
metabolomics: pd.DataFrame=None,
75-
genes: pd.DataFrame=None,
76-
combinations: pd.DataFrame=None,
44+
name: Optional[str]=None,
45+
transcriptomics: Optional[pd.DataFrame]=None,
46+
proteomics: Optional[pd.DataFrame]=None,
47+
mutations: Optional[pd.DataFrame]=None,
48+
copy_number: Optional[pd.DataFrame]=None,
49+
samples: Optional[pd.DataFrame]=None,
50+
drugs: Optional[pd.DataFrame]=None,
51+
drug_descriptors: Optional[pd.DataFrame]=None,
52+
mirna: Optional[pd.DataFrame]=None,
53+
experiments: Optional[pd.DataFrame]=None,
54+
methylation: Optional[pd.DataFrame]=None,
55+
metabolomics: Optional[pd.DataFrame]=None,
56+
genes: Optional[pd.DataFrame]=None,
57+
combinations: Optional[pd.DataFrame]=None,
7758
):
7859
"""
7960
Load datasets of a specific type into predefined attributes of this class instance.
@@ -131,12 +112,6 @@ def __init__(
131112
# getters / setters & deleters
132113
# ----------------------------
133114

134-
135-
@property
136-
def data_format_params(self):
137-
return self._data_format_params
138-
139-
140115
@property
141116
def name(self):
142117
return self._name
@@ -526,6 +501,7 @@ def load(
526501
dataset = pickle.load(file=file)
527502
print("DONE", file=sys.stderr)
528503
return dataset
504+
raise FileNotFoundError("No suitable pickle file found.")
529505

530506

531507

@@ -654,7 +630,6 @@ def format(
654630
index='improve_drug_id',
655631
columns='improve_sample_id'
656632
)
657-
return ret
658633

659634
elif data_type == "combinations":
660635
raise NotImplementedError(
@@ -771,7 +746,7 @@ def split_train_test_validate(
771746
train, other = _split_two_way(
772747
data=data,
773748
split_type=split_type,
774-
ratio=[ratio[0], ratio[1] + ratio[2]],
749+
ratio=(ratio[0], ratio[1] + ratio[2]),
775750
stratify_by=stratify_by,
776751
balance=balance,
777752
random_state=random_state,
@@ -781,7 +756,7 @@ def split_train_test_validate(
781756
test, val = _split_two_way(
782757
data=other,
783758
split_type=split_type,
784-
ratio=[ratio[1], ratio[2]],
759+
ratio=(ratio[1], ratio[2]),
785760
stratify_by=stratify_by,
786761
balance=balance,
787762
random_state=random_state,
@@ -993,10 +968,10 @@ def _filter(data: Dataset, split: pd.DataFrame) -> Dataset:
993968
return data_ret
994969

995970
def _balance_data(
996-
data: pd.Dataframe,
971+
data: pd.DataFrame,
997972
random_state: Optional[Union[int,RandomState]]=None,
998973
# oversample: bool=False,
999-
) -> pd.Dataframe:
974+
) -> pd.DataFrame:
1000975
tmp = deepcopy(data)
1001976
counts = tmp.value_counts('split_class')
1002977
ret_df = (
@@ -1012,7 +987,7 @@ def _create_classes(
1012987
metric: str,
1013988
num_classes: int=2,
1014989
quantiles: bool=True,
1015-
thresh: float=None,
990+
thresh: Optional[float]=None,
1016991
) -> pd.DataFrame:
1017992
"""
1018993
Helper function that bins experiment data into a number of defined
@@ -1101,7 +1076,7 @@ def _split_two_way(
11011076
split_type: Literal[
11021077
'mixed-set', 'drug-blind', 'cancer-blind'
11031078
]='mixed-set',
1104-
ratio: tuple[int, int, int]=(8,2),
1079+
ratio: tuple[int, int]=(8,2),
11051080
balance: bool=False,
11061081
stratify_by: Optional[str]=None,
11071082
random_state: Optional[Union[int,RandomState]]=None,

0 commit comments

Comments
 (0)