Skip to content

Commit ac06be8

Browse files
changes after testing funcitons
1 parent 2ebfa38 commit ac06be8

2 files changed

Lines changed: 69 additions & 10 deletions

File tree

build/liverpdo/04-experiments-liverpdo.py

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ def download_experiments_data(synID:str , save_path:str = None, synToken:str = N
5757

5858

5959

60+
### Parse Data Function
6061
def parse_experiments_excel_sheets(first_file_path, second_file_path):
6162
# read in the excel files
62-
first_exp_excel = pd.ExcelFile(open(first_file_path, 'rb'))
63+
first_exp_excel = pd.ExcelFile(open(first_experiments_path, 'rb'))
6364
first_experiments_dict = pd.read_excel(first_exp_excel, sheet_name=None, header=None)
64-
rest_exp_excel = pd.ExcelFile(open(second_file_path, 'rb'))
65+
rest_exp_excel = pd.ExcelFile(open(rest_experiments_path, 'rb'))
6566
rest_experiments_dict = pd.read_excel(rest_exp_excel, sheet_name=None, header=None)
66-
# use for loops to interate through the dictionaries, melt the df's into longer df's instead of matrices, and then concat
6767
list_of_exp_excels = [first_experiments_dict,rest_experiments_dict]
6868
full_df_list = []
6969
for dictionary in list_of_exp_excels:
@@ -76,13 +76,30 @@ def parse_experiments_excel_sheets(first_file_path, second_file_path):
7676
conc_indexes = conc_indexes + [one_sample_df.index[-1]+1]
7777
for index in range(0,(len(conc_indexes)-1)):
7878
one_conc_df = one_sample_df.loc[conc_indexes[index]:(conc_indexes[(index+1)]-1)]
79+
# print("length before melt is:", len(one_conc_df))
80+
# print("end index is ",(conc_indexes[(index+1)]-1))
7981
one_conc_df.columns = one_conc_df.iloc[0]
8082
one_conc_df = one_conc_df[1:]
8183
one_conc_df = pd.melt(one_conc_df, id_vars=['concentration'], value_vars=one_conc_df.columns[one_conc_df.columns != 'concentration'])
82-
one_conc_df = one_conc_df.rename(columns={"concentration":"drug_id",one_conc_df.columns[1]:"concentration","value":"count"})
84+
one_conc_df = one_conc_df.rename(columns={"concentration":"drug_id",one_conc_df.columns[1]:"DOSE","value":"count"})
85+
one_conc_df = one_conc_df.astype({"DOSE": 'float'})
86+
one_conc_df = one_conc_df.reset_index(drop=True)
87+
# now convert all counts to growth rates
88+
for drug in one_conc_df['drug_id'].unique():
89+
# print("the drug name is",drug)
90+
# print("the mean of the 0's is ",one_conc_df[(one_conc_df['drug_id'] == drug) & (one_conc_df['DOSE'] == 0)]['count'].mean())
91+
mean_of_zeros = one_conc_df[(one_conc_df['drug_id'] == drug) & (one_conc_df['DOSE'] == 0)]['count'].mean()
92+
one_conc_df.loc[one_conc_df['drug_id'] == drug, 'GROWTH'] = (one_conc_df[(one_conc_df['drug_id'] == drug)]['count']/mean_of_zeros)*100
93+
# print("sample is ", experiment_key)
94+
# print("index is ",index)
95+
# print("length of df is", len(one_conc_df))
96+
# print("number of unique drugs is", one_conc_df['drug_id'].nunique())
8397
list_of_dfs.append(one_conc_df)
98+
# print(list_of_dfs)
8499
elongated_df = pd.concat(list_of_dfs)
85100
elongated_df['sample_name'] = experiment_key
101+
# print(experiment_key)
102+
# print(elongated_df['drug_id'].nunique())
86103
list_of_finished_dfs.append(elongated_df)
87104
full_experiments_df = pd.concat(list_of_finished_dfs)
88105
full_df_list.append(full_experiments_df)
@@ -93,7 +110,7 @@ def parse_experiments_excel_sheets(first_file_path, second_file_path):
93110
def merge_improve_samples_drugs(experiment_data:pd.DataFrame, samples_data_path:str, drugs_info_path:str, improve_drugs_path:str):
94111
# read in data
95112
experiments_df = experiment_data
96-
improve_sample_df = pd.read_csv(samples_data_path, sep='\t')
113+
improve_sample_df = pd.read_csv(samples_data_path)
97114
improve_drug_df = pd.read_csv(improve_drugs_path, sep='\t')
98115
druginfo_df = pd.read_excel(drugs_info_path)
99116
# merging improve drug id's
@@ -113,4 +130,46 @@ def merge_improve_samples_drugs(experiment_data:pd.DataFrame, samples_data_path:
113130
all_merged = all_merged[['study','time','DOSE','GROWTH','Drug','improve_sample_id','time_unit','source']]
114131
all_merged = all_merged.dropna()
115132

116-
return(all_merged)
133+
return(all_merged)
134+
135+
136+
137+
if __name__ == "__main__":
138+
parser = argparse.ArgumentParser(description='###')
139+
140+
# arguments for what data to process
141+
parser.add_argument('-D', '--Download', action = 'store_true', default=False, help='Download experiments data.')
142+
parser.add_argument('-t', '--Token', type=str, default=None, help='Synapse Token')
143+
parser.add_argument('-E', '--Experiment', action = 'store_true', default=False, help='Create experiments data.')
144+
parser.add_argument('-s', '--Samples', type=str, default=None, help='Path to samples file.')
145+
parser.add_argument('-d', '--Drugs', type=str, default=None, help='Path to drugs file')
146+
147+
args = parser.parse_args()
148+
149+
150+
###########################
151+
152+
if args.Download:
153+
if args.Token is None:
154+
print("No synpase download tocken was provided. Cannot download data.")
155+
exit()
156+
else:
157+
print("Downloading Files from Synapse.")
158+
# download experiments data from synapse, which are split into 2 excel files
159+
first_experiments_path = download_experiments_data(synID="syn66401301", save_path="/tmp/", synToken=args.Token)
160+
rest_experiments_path = download_experiments_data(synID="syn66401302", save_path="/tmp/", synToken=args.Token)
161+
if args.Experiment:
162+
if args.Samples is None:
163+
print("No path to samples file detected. Cannot generate experiment data.")
164+
exit()
165+
if args.Drugs is None:
166+
print("No path to drugs file detected. Cannot generate experiment data.")
167+
exit()
168+
else:
169+
print("Parsing experiments excel sheets")
170+
parsed_experiments_data = parse_experiments_excel_sheets(first_experiments_path, rest_experiments_path)
171+
print("Generating experiments data.")
172+
experiments_df = merge_improve_samples_drugs(experiment_data = parsed_experiments_data, samples_data_path = args.Samples, improve_drugs_path = args.Drugs, drugs_info_path="/tmp/4_Drug_information.xlsx")
173+
output_path = "/tmp/liverpdo_experiments_for_curve_fitting.tsv"
174+
print("Experiments data sucessfully generated. Saving tsv to {}".format(output_path))
175+
experiments_df.to_csv(output_path, sep='\t')

build/liverpdo/build_exp.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ set -euo pipefail
33
trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
44

55
# running the drug python script
6-
echo "Running 04-experiments-crcpdo.py with token, samples file $1 and drugs file $2."
7-
python3 04-experiments-crcpdo.py --Download --Experiment --Token $SYNAPSE_AUTH_TOKEN --Samples $1 --Drugs $2
6+
echo "Running 04-experiments-liverpdo.py with token, samples file $1 and drugs file $2."
7+
python3 04-experiments-liverpdo.py --Download --Experiment --Token $SYNAPSE_AUTH_TOKEN --Samples $1 --Drugs $2
88

99
# running the drug descriptor python script
10-
python3 fit_curve.py --input /tmp/crcpdo_experiments_for_curve_fitting.tsv --output /tmp/crcpdo_experiments.tsv
10+
python3 fit_curve.py --input /tmp/liverpdo_experiments_for_curve_fitting.tsv --output /tmp/liverpdo_experiments.tsv
1111

1212
# change name of script
13-
mv /tmp/crcpdo_experiments.tsv.0 /tmp/crcpdo_experiments.tsv
13+
mv /tmp/liverpdo_experiments.tsv.0 /tmp/liverpdo_experiments.tsv

0 commit comments

Comments
 (0)