Skip to content

Commit 0848d4a

Browse files
adding changes to mutations function
1 parent 62f61eb commit 0848d4a

1 file changed

Lines changed: 3 additions & 1 deletion

File tree

build/novartispdx/02-omics-novartispdx.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ def map_mutations_novPDX(mutation_data, improve_id_data, entrez_data):
231231

232232
# turn details column into mutation column
233233
mutations_only_df['mutation'] = mutations_only_df['Details'].str.split(pat = ",", expand=True).iloc[:,0]
234+
mutations_only_df[~mutations_only_df['mutation'].str.contains(r'\?')] # 1866 rows have a "?" in them. remove
234235

235236
# create variant classifications with information that we have
236237
mutations_only_df['variant_classification'] = np.nan
@@ -246,11 +247,12 @@ def map_mutations_novPDX(mutation_data, improve_id_data, entrez_data):
246247
mutations_final = pd.merge(mutations_only_df, improve_id_data[['to_merge','improve_sample_id']], how = 'inner', left_on='Sample', right_on='to_merge')
247248

248249
# clean up column names and data types
250+
mutations_final = mutations_final.drop(columns={'Unnamed: 0'})
249251
mutations_final = mutations_final.rename(columns={'Entrez':'entrez_id'})
250252
mutations_final = mutations_final.drop(columns=['Sample','Gene','Category','Details','to_merge'])
251253
mutations_final['source'] = "CPDM"
252254
mutations_final['study'] = "novartispdx"
253-
mutations_final = mutations_final.astype({'entrez_id':'int'})
255+
mutations_final = mutations_final.astype({'entrez_id':'int', 'mutation':'str'})
254256

255257
return(mutations_final)
256258

0 commit comments

Comments
 (0)