final script for novartispdx and some changes to calc_pdx_metrics.py

RubyFore · RubyFore · commit a9164ca5397a · 2025-07-16T15:17:57.000-07:00
Changes to calc_pdx_metrics.py include adding flexibility for `study` and `source` instead of being hard coded to MPNST. Added an if statement around the final touches to combos dataset, because if there is no drug data for the combo drugs, errors arise. Moved writing combos to csv to be inside this if statement.
diff --git a/build/novartispdx/01-samples-novartispdx.py b/build/novartispdx/01-samples-novartispdx.py
@@ -44,7 +44,6 @@ def get_complete_novartispdx_sample_sheet(synObject):
     print("Logging into Synapse")
     PAT = args.token
     synObject = synapseclient.login(authToken=PAT)
-
     samplesheet = get_complete_novartispdx_sample_sheet(synObject)
 
     if (args.prevSamples):
diff --git a/build/novartispdx/04-experiments-novartispdx.py b/build/novartispdx/04-experiments-novartispdx.py
@@ -20,7 +20,7 @@ def get_novartis_pdx_experiments_file(synObject, samples_df):
     novartispdx_curvefile['treatment']=novartispdx_curvefile['treatment'].str.replace('untreated', 'control')
     novartispdx_curvefile['experiment'] = novartispdx_curvefile.groupby(['model_id']).ngroup()+1
     # remove triple combination(s)
-    novartispdx_curvefile = novartispdx_curvefile[~novartispdx_curvefile['treatment'].str.contains(r'\+.*\+')]
+    novartispdx_curvefile = novartispdx_curvefile[~novartispdx_curvefile['treatment'].str.contains(r'\+')]
     # remove dose information appended to some drugs in the treatment column and include in dose colum
     druganddose = novartispdx_curvefile['treatment'].str.split('-', expand=True)
     druganddose = druganddose.rename({0: 'treatment', 1:'dose'}, axis=1)
@@ -39,12 +39,9 @@ def get_novartis_pdx_experiments_file(synObject, samples_df):
     #merge on drug names done in calc_pdx_metrics.py
     #final_w_drugIDs = finaldf.merge(drug_df, how='left',right_on='chem_name', left_on="treatment")
     final_allIDs = nomissingcontrols.merge(samples_df, how='left', right_on='common_name', left_on='model_id') 
-    print(final_allIDs.head)
     final_allIDs = final_allIDs.drop('model_id', axis=1)
     finalDF = final_allIDs.rename({'improve_sample_id':'model_id'}, axis=1)
-    print(finalDF.head)
     finalcurvefile = finalDF[['model_id', 'time', 'volume', 'treatment', 'experiment', 'dose']]
-    print(finalcurvefile.head)
     return finalcurvefile
 
 
@@ -59,7 +56,6 @@ def get_novartis_pdx_experiments_file(synObject, samples_df):
     print("Logging into Synapse")
     PAT = args.token
     synObject = synapseclient.login(authToken=PAT)
-    #drug_df = pd.read_csv(args.drugfile, sep='\t')
     samples_df = pd.read_csv(args.curSampleFile)
     
     doseresponse_data = get_novartis_pdx_experiments_file(synObject, samples_df)
diff --git a/build/novartispdx/build_experiments.sh b/build/novartispdx/build_experiments.sh
@@ -2,4 +2,4 @@
 #python3 04-experiments-novartispdx.py --token $SYNAPSE_AUTH_TOKEN 
 
 python3 -m novartispdx.04-experiments-novartispdx --token $SYNAPSE_AUTH_TOKEN -o ~/Projects/CoderData/dev-environment/novartispdx/novartispdx_curvedata.tsv
-python3 utils/calc_pdx_metrics.py /tmp/novartispdx_curvedata.tsv --drugfile=/tmp/novartispdx_drugs.tsv --outprefix=/tmp/novartispdx
+python3 utils/calc_pdx_metrics.py /tmp/novartispdx_curvedata.tsv --drugfile=/tmp/novartispdx_drugs.tsv --outprefix=/tmp/novartispdx --study='Novartis PDX Gao etal 2015' --source='Synapse'
diff --git a/build/utils/calc_pdx_metrics.py b/build/utils/calc_pdx_metrics.py
@@ -184,12 +184,12 @@ def AUC(time, volume, time_normalize=True):
     dict: Dictionary containing the AUC value.
     """
     auc = trapz_auc(time, volume)
-    print('at line 187')
-    print(time.shape)
-    print(time.dtype)
-    print(np.max(time.astype(int)))
-    print('auc is : ')
-    print(auc)
+    #print('at line 187')
+    #print(time.shape)
+    #print(time.dtype)
+    #print(np.max(time.astype(int)))
+    #print('auc is : ')
+    #print(auc)
     if time_normalize:
         auc = auc/np.max(time)
     return {"metric": "auc", "value": auc, 'time':np.max(time)}
@@ -292,7 +292,7 @@ def lmm(time, volume, treatment, drug_name):
     #interaction_term = 'time:exp_type'
 #    if interaction_term in fit.params:
 #    time_coef_value = fit.params['time']
-    print(fit.params)
+    #print(fit.params)
     i_coef_value = fit.params['time:exp_type[T.'+drug_name+']']
     #i_coef_value = fit.params['time:exp_type['+drug_name+']']
    # else:
@@ -312,6 +312,8 @@ def main():
     parser.add_argument('curvefile')
     parser.add_argument('--drugfile')
     parser.add_argument('--outprefix',default='/tmp/')
+    parser.add_argument('--study')
+    parser.add_argument('--source')
     
     args = parser.parse_args()
     
@@ -325,22 +327,21 @@ def main():
     expsing = expsing.dropna()
     
     # source	improve_sample_id	improve_drug_id	study	time	time_unit	dose_response_metric	dose_response_value
-
-    combos[['drug1','drug2']]=combos.drug.str.split('+',expand=True)
-    print('COMBOS ARE: ')
-    print(combos[['drug1', 'drug2']])
-    combos = combos.rename({'metric':'drug_combination_metric','value':'drug_combination_value','sample':'improve_sample_id'},axis=1).dropna()
-
-    expcomb = combos.rename({'drug1':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_1'},axis=1)[['improve_drug_1','drug2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
-    expcomb = expcomb.rename({'drug2':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_2'},axis=1)[['improve_drug_1','improve_drug_2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
-    print(expcomb[['improve_drug_1', 'improve_drug_2']])
-    expcomb[['source']]='Synapse'
-    expcomb[['study']]='MPNST PDX in vivo'
-
-    expsing[['source']]='Synapse'
-    expsing[['study']]='MPNST PDX in vivo'
+    if combos.shape[0]> 0:
+        combos[['drug1','drug2']]=combos['drug'].str.split('+',expand=True)
+        
+        combos = combos.rename({'metric':'drug_combination_metric','value':'drug_combination_value','sample':'improve_sample_id'},axis=1).dropna()
+        
+        expcomb = combos.rename({'drug1':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_1'},axis=1)[['improve_drug_1','drug2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
+        expcomb = expcomb.rename({'drug2':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_2'},axis=1)[['improve_drug_1','improve_drug_2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
+        expcomb[['source']]=args.source
+        expcomb[['study']]=args.study
+        expcomb.to_csv(args.outprefix+'_combinations.tsv',index=False, sep="\t")
+
+    expsing[['source']]=args.source
+    expsing[['study']]=args.study
     expsing.to_csv(args.outprefix+'_experiments.tsv',index=False, sep="\t")
-    expcomb.to_csv(args.outprefix+'_combinations.tsv',index=False, sep="\t")
+    #expcomb.to_csv(args.outprefix+'_combinations.tsv',index=False, sep="\t")