@@ -209,12 +209,12 @@ def process_datasets(args):
209209 #-------------------------------------------------------------------
210210
211211
212- # split_data_sets(
213- # args=args,
214- # data_sets=data_sets,
215- # data_sets_info=data_sets_info,
216- # response_data=response_data
217- # )
212+ split_data_sets (
213+ args = args ,
214+ data_sets = data_sets ,
215+ data_sets_info = data_sets_info ,
216+ response_data = response_data
217+ )
218218
219219 #-------------------------------------------------------------------
220220 # getting common / reference gene symbols
@@ -481,6 +481,41 @@ def process_datasets(args):
481481 index = False ,
482482 )
483483
484+
485+ #-------------------------------------------------------------------
486+ # create morgan table
487+ #-------------------------------------------------------------------
488+
489+ dfs_to_merge = {}
490+ for data_set in data_sets :
491+ if (data_sets [data_set ].experiments is not None
492+ and data_sets [data_set ].drug_descriptors is not None
493+ ):
494+ df_tmp = data_sets [data_set ].format (data_type = 'drug_descriptor' , shape = 'wide' )
495+ df_tmp = df_tmp ['morgan fingerprint' ]
496+ dfs_to_merge [data_set ] = df_tmp
497+
498+ concat_drugs = pd .concat (dfs_to_merge .values ())
499+ out_df = concat_drugs .reset_index ()
500+ out_df = out_df .drop_duplicates (subset = ['improve_drug_id' ], keep = 'first' )
501+ out_df = pd .concat ((out_df , out_df ['morgan fingerprint' ].astype (str ).apply (lambda x : pd .Series (list (x ))).astype (int ).add_prefix ('ecfp4.' )), axis = 1 )
502+ out_df = out_df .drop (['morgan fingerprint' ], axis = 1 )
503+ out_df .rename (
504+ columns = {'improve_drug_id' : 'improve_chem_id' },
505+ inplace = True ,
506+ )
507+
508+ outfile_path = args .WORKDIR .joinpath (
509+ "data_out" ,
510+ "x_data" ,
511+ "drug_ecfp4_nbits1024.tsv"
512+ )
513+ out_df .to_csv (
514+ path_or_buf = outfile_path ,
515+ sep = '\t ' ,
516+ index = False ,
517+ )
518+
484519 #-------------------------------------------------------------------
485520 # create mutation count table
486521 #-------------------------------------------------------------------
0 commit comments