Skip to content

Commit e8d04b0

Browse files
committed
added routine for <DATASET>_all.txt index files
1 parent 34c24bc commit e8d04b0

1 file changed

Lines changed: 29 additions & 3 deletions

File tree

scripts/prepare_data_for_improve.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,35 @@ def process_datasets(args):
153153

154154
for data_set in data_sets_info.keys():
155155
if data_sets[data_set].experiments is not None:
156+
157+
# getting "<DATASET>_all.txt"
158+
drug_response_rows = (
159+
data_sets['mpnst']
160+
.experiments[
161+
['improve_sample_id', 'improve_drug_id', "time", "study"]
162+
]
163+
.drop_duplicates()
164+
)
165+
drug_response_rows.rename(
166+
columns={'improve_drug_id': 'improve_chem_id'},
167+
inplace=True,
168+
)
169+
row_nums = pd.merge(
170+
response_data,
171+
drug_response_rows,
172+
how='inner',
173+
on=['improve_sample_id', 'improve_chem_id', "time", "study"]
174+
)
175+
outfile_path = splits_folder.joinpath(f"{data_set}_all.txt")
176+
row_nums.to_csv(
177+
path_or_buf=outfile_path,
178+
columns=['index'],
179+
index=False,
180+
header=False
181+
)
182+
183+
# generation of the actual splits
184+
156185
splits = {}
157186
for i in range(0, args.NUM_SPLITS):
158187
splits[i] = data_sets[data_set].train_test_validate(
@@ -249,9 +278,6 @@ def process_datasets(args):
249278
)
250279

251280

252-
# look up the row ids for all data items of each data source to
253-
# create "<STUDY>_all.txt in /splits"
254-
255281

256282
# join the "meta data tables" like copynumber etc.
257283

0 commit comments

Comments
 (0)