@@ -205,79 +205,6 @@ def timeout_handler(signum, frame):
205205 should_continue = False
206206
207207
208- # def update_dataframe_and_write_tsv(unique_names, output_filename="drugs.tsv", ignore_chems="ignore_chems.txt",
209- # batch_size=1, isname=True, time_limit=48 * 60 * 60):
210- # """
211- # Updates the data frame with drug information and writes it to a TSV file.
212-
213- # Parameters:
214- # - unique_names (iterable): List of unique compound names or CIDs.
215- # - output_filename (str): File path to the output TSV file.
216- # - ignore_chems (str): File path to log ignored compounds.
217- # - batch_size (int): Number of compounds to process in each batch.
218- # - isname (bool): True if unique_names are names, False if they're CIDs.
219- # - time_limit (int): Time limit for the script in seconds. This is a remnant of the GitHub Action CI.
220-
221- # Returns:
222- # - None
223- # """
224- # global should_continue, existing_synonyms, existing_pubchemids
225- # signal.signal(signal.SIGALRM, timeout_handler)
226- # signal.alarm(time_limit)
227- # print(f'Starting with {len(unique_names)} unique drug names/IDs')
228-
229- # try:
230- # print(f'Reading existing data from {output_filename}')
231- # read_existing_data(output_filename)
232- # if isname:
233- # unique_names = set([str(name).lower() for name in unique_names if not pd.isna(name)])
234- # unique_names = set(unique_names) - set(existing_synonyms)
235- # print(f'Looking at {len(unique_names)} names')
236- # else:
237- # unique_names = set([str(name) for name in unique_names if not pd.isna(name)])
238- # unique_names = set(unique_names) - set(existing_pubchemids)
239- # print(f'Looking at {len(unique_names)} IDs')
240- # ignore_chem_set = set()
241- # if os.path.exists(ignore_chems):
242- # with open(ignore_chems, 'r') as file:
243- # for line in file:
244- # ignore_chem_set.add(line.strip())
245- # unique_names = list(set(unique_names) - ignore_chem_set)
246-
247- # print(f"{len(unique_names)} Drugs to search")
248- # for i in range(0, len(unique_names), batch_size):
249- # if not should_continue:
250- # break
251- # if unique_names[i] in existing_synonyms or unique_names[i] in existing_pubchemids:
252- # continue
253-
254- # batch = unique_names[i:i + batch_size]
255- # data = fetch_data_for_batch(batch, ignore_chems, isname)
256- # if data:
257- # file_exists = os.path.isfile(output_filename)
258- # mode = 'a' if file_exists else 'w'
259- # with open(output_filename, mode) as f:
260- # if not file_exists:
261- # f.write("improve_drug_id\tchem_name\tpubchem_id\tcanSMILES\tInChIKey\tformula\tweight\n")
262- # for entry in data:
263- # f.write(f"{entry['improve_drug_id']}\t{entry['name']}\t{entry.get('CID', '')}\t"
264- # f"{entry['SMILES']}\t{entry['InChIKey']}\t"
265- # f"{entry['MolecularFormula']}\t{entry['MolecularWeight']}\n")
266-
267- # with open(ignore_chems, "a") as ig_f:
268- # for entry in data:
269- # if isname:
270- # ig_f.write(f"{entry['name']}\n")
271- # else:
272- # ig_f.write(f"{entry.get('CID', '')}\n")
273-
274- # except Exception as e:
275- # print(f"An unexpected error occurred: {e}")
276- # finally:
277- # signal.alarm(0)
278-
279-
280-
281208
282209
283210def _load_prev_drugs_union (prevDrugFilepath : str ) -> pd .DataFrame :
@@ -497,8 +424,8 @@ def update_dataframe_and_write_tsv(unique_names,
497424 nums_comb = pd .to_numeric (extracted_comb , errors = "coerce" )
498425 if not nums_comb .empty :
499426 new_ids = set (combined .loc [nums_comb > previous_max , "improve_drug_id" ])
500- if new_ids :
501- print (f"Newly assigned improve_drug_id(s): { new_ids } " )
427+ # if new_ids:
428+ # print(f"Newly assigned improve_drug_id(s): {new_ids}")
502429
503430 # --- 9) union and filter final DataFrame by improve_drug_id(s) ---
504431 keep_ids = hit_ids .union (new_ids )
0 commit comments