@@ -439,6 +439,41 @@ def process_datasets(args):
439439 )
440440
441441
442+ #-------------------------------------------------------------------
443+ # create drug_info table
444+ #-------------------------------------------------------------------
445+
446+ dfs_to_merge = {}
447+ for data_set in data_sets :
448+ if (data_sets [data_set ].experiments is not None
449+ and data_sets [data_set ].drugs is not None
450+ ):
451+ dfs_to_merge [data_set ] = deepcopy (data_sets [data_set ].drugs )
452+
453+ concat_drugs = pd .concat (dfs_to_merge .values ())
454+ out_df = deepcopy (concat_drugs )
455+ out_df ['SMILES' ] = concat_drugs ['canSMILES' ]
456+ out_df ['DrugID' ] = concat_drugs ['improve_drug_id' ]
457+ out_df ['CAS_ID' ] = None
458+ out_df .drop (['formula' , 'weight' , 'InChIKey' ], axis = 1 , inplace = True )
459+ out_df = out_df [['DrugID' , 'SMILES' , 'canSMILES' , 'chem_name' , 'pubchem_id' , 'CAS_ID' , 'improve_drug_id' ]]
460+ out_df = out_df .rename (columns = {'chem_name' : 'NAME' , 'pubchem_id' : 'PUBCHEM_ID' , 'improve_drug_id' :'improve_chem_id' })
461+ out_df ['PUBCHEM_ID' ] = out_df ['PUBCHEM_ID' ].fillna (0 )
462+ out_df ['PUBCHEM_ID' ] = pd .to_numeric (out_df ['PUBCHEM_ID' ], errors = 'coerce' , downcast = 'integer' )
463+ out_df ['PUBCHEM_ID' ] = out_df ['PUBCHEM_ID' ].replace (0 , None )
464+
465+ outfile_path = args .WORKDIR .joinpath (
466+ "data_out" ,
467+ "x_data" ,
468+ "drug_info.tsv"
469+ )
470+ out_df .to_csv (
471+ path_or_buf = outfile_path ,
472+ sep = '\t ' ,
473+ index = False ,
474+ )
475+
476+
442477 #-------------------------------------------------------------------
443478 # create mordred table
444479 #-------------------------------------------------------------------
0 commit comments