Fixed git issue. Adding pdx update from novartis and local change

jjacobson95 · jjacobson95 · commit fca76bdd811b · 2025-07-24T09:36:35.000-07:00
diff --git a/build/mpnst/03_get_experiments.R b/build/mpnst/03_get_experiments.R
@@ -240,12 +240,14 @@ fwrite(pdx_data, file.path("/tmp", paste0(out_prefix, "_pdx_curve_data.tsv")), s
 message("Wrote PDX curve data")
 
 system(sprintf(
-  "/opt/venv/bin/python calc_pdx_metrics.py %s --drugfile %s --outprefix %s",
- paste0("/tmp/", out_prefix, "_pdx_curve_data.tsv"),
- drugfile,
- paste0("/tmp/", out_prefix, "_pdx")
+  "/opt/venv/bin/python calc_pdx_metrics.py %s --drugfile %s --outprefix %s --source 'NF Data Portal' --study 'MPNST PDX'",
+  paste0("/tmp/", out_prefix, "_pdx_curve_data.tsv"),
+  drugfile,
+  paste0("/tmp/", out_prefix, "_pdx")
 ))
 
+
+
 message("Wrote PDX experiments to ", "/tmp/",  out_prefix, "_pdx_experiments.tsv and combinations")
 
 
diff --git a/build/utils/calc_pdx_metrics.py b/build/utils/calc_pdx_metrics.py
@@ -184,7 +184,12 @@ def AUC(time, volume, time_normalize=True):
     dict: Dictionary containing the AUC value.
     """
     auc = trapz_auc(time, volume)
-    #print(time)
+    #print('at line 187')
+    #print(time.shape)
+    #print(time.dtype)
+    #print(np.max(time.astype(int)))
+    #print('auc is : ')
+    #print(auc)
     if time_normalize:
         auc = auc/np.max(time)
     return {"metric": "auc", "value": auc, 'time':np.max(time)}
@@ -270,10 +275,15 @@ def lmm(time, volume, treatment, drug_name):
         raise ValueError("These columns must be present: 'model_id', 'volume', 'time', 'exp_type'")
     
     data['log_volume'] = np.log(data['volume'])
-    
+    #print('drug name is ' + drug_name)
+    data['exp_type'] = data['exp_type'].astype('category')
+    data['exp_type']=pd.Categorical(data['exp_type'],categories = ['control',drug_name], ordered=True)
+    #print(data)
+    #print(data['exp_type'].cat.categories)
     # Define the formula for mixed linear model
     formula = 'log_volume ~ time*exp_type'
     
+    #print(data['exp_type'].cat.categories)
     # Fit the model
     model = mixedlm(formula, data, groups=data['model_id'])
     fit = model.fit()
@@ -284,6 +294,7 @@ def lmm(time, volume, treatment, drug_name):
 #    time_coef_value = fit.params['time']
     #print(fit.params)
     i_coef_value = fit.params['time:exp_type[T.'+drug_name+']']
+    #i_coef_value = fit.params['time:exp_type['+drug_name+']']
    # else:
    #     coef_value = None  # Handle the case when the interaction term is not present
     
@@ -301,6 +312,8 @@ def main():
     parser.add_argument('curvefile')
     parser.add_argument('--drugfile')
     parser.add_argument('--outprefix',default='/tmp/')
+    parser.add_argument('--study')
+    parser.add_argument('--source')
     
     args = parser.parse_args()
     
@@ -314,20 +327,21 @@ def main():
     expsing = expsing.dropna()
     
     # source	improve_sample_id	improve_drug_id	study	time	time_unit	dose_response_metric	dose_response_value
-
-    combos[['drug1','drug2']]=combos.drug.str.split('+',expand=True)
-    combos = combos.rename({'metric':'drug_combination_metric','value':'drug_combination_value','sample':'improve_sample_id'},axis=1).dropna()
-
-    expcomb = combos.rename({'drug1':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_1'},axis=1)[['improve_drug_1','drug2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
-    expcomb = expcomb.rename({'drug2':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_2'},axis=1)[['improve_drug_1','improve_drug_2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
-
-    expcomb[['source']]='Synapse'
-    expcomb[['study']]='MPNST PDX in vivo'
-
-    expsing[['source']]='Synapse'
-    expsing[['study']]='MPNST PDX in vivo'
+    if combos.shape[0]> 0:
+        combos[['drug1','drug2']]=combos['drug'].str.split('+',expand=True)
+        
+        combos = combos.rename({'metric':'drug_combination_metric','value':'drug_combination_value','sample':'improve_sample_id'},axis=1).dropna()
+        
+        expcomb = combos.rename({'drug1':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_1'},axis=1)[['improve_drug_1','drug2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
+        expcomb = expcomb.rename({'drug2':'chem_name'},axis=1).merge(drugs,on='chem_name',how='left').rename({'improve_drug_id':'improve_drug_2'},axis=1)[['improve_drug_1','improve_drug_2','improve_sample_id','time_unit','time','drug_combination_metric','drug_combination_value']]
+        expcomb[['source']]=args.source
+        expcomb[['study']]=args.study
+        expcomb.to_csv(args.outprefix+'_combinations.tsv',index=False, sep="\t")
+
+    expsing[['source']]=args.source
+    expsing[['study']]=args.study
     expsing.to_csv(args.outprefix+'_experiments.tsv',index=False, sep="\t")
-    expcomb.to_csv(args.outprefix+'_combinations.tsv',index=False, sep="\t")
+    #expcomb.to_csv(args.outprefix+'_combinations.tsv',index=False, sep="\t")
     
 
     
@@ -341,21 +355,25 @@ def get_drug_stats(df, control='control'):
     for name, group in tqdm(groups):
         # Each group contains multiple treatments and a control
         drugs = set(group.treatment) - set([control])
-        print(name[0])
-        print(drugs)
+        #print('line 355')
+        #print(name[0])
+        #print(drugs)
         mod = list(set(group.model_id))[0]
 
         ctl_data = group[group.treatment == control]
         ctl_time = np.array(ctl_data.time)
         ctl_volume = np.array(ctl_data.volume)
-
+        if (ctl_volume.shape[0] < 2):
+            continue
         ctl_auc = AUC(ctl_time, ctl_volume)
         for d in drugs:
-            print(d)
-            d_data = group[group.treatment == d]
+            #print('is our drug a string or dict?')
+            #print(str(d))
+            d_data = group[group.treatment == str(d)]
             treat_time = np.array(d_data.time)
             treat_volume = np.array(d_data.volume)
-
+            if (treat_volume.shape[0] < 2):
+                continue
             # Get ABC for group
             treat_auc = AUC(treat_time, treat_volume)
             treat_abc = ABC(ctl_time, ctl_volume, treat_time, treat_volume)
@@ -368,6 +386,7 @@ def get_drug_stats(df, control='control'):
 
             #llm
             comb = pd.concat([ctl_data, d_data])
+            #print(comb)
             lmm_res = lmm(comb.time, comb.volume, comb.treatment, d)
             lmm_res.update({'sample': mod, 'drug': d, 'time': np.max(treat_time), 'time_unit': 'days'})
             if '+' in d: