Skip to content

Commit 24317c4

Browse files
committed
Modify lipid workflow to use bulk molecular formula search
1 parent a902c08 commit 24317c4

1 file changed

Lines changed: 4 additions & 32 deletions

File tree

support_code/nmdc/lipidomics/lipidomics_workflow.py

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from corems.mass_spectra.input.mzml import MZMLSpectraParser
2121
from corems.mass_spectra.input.rawFileReader import ImportMassSpectraThermoMSFileReader
2222
from corems.mass_spectra.output.export import LipidomicsExport
23-
from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas
23+
from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas, SearchMolecularFormulasLC
2424
from corems.molecular_id.search.database_interfaces import MetabRefLCInterface
2525
from corems.encapsulation.input.parameter_from_json import (
2626
load_and_set_toml_parameters_lcms,
@@ -193,35 +193,8 @@ def molecular_formula_search(myLCMSobj):
193193
-------
194194
None, processes the LCMS object
195195
"""
196-
i = 1
197-
# get df of mass features
198-
mf_df = myLCMSobj.mass_features_to_df()
199-
200-
# search molecular formulas for each mass feature
201-
total_decon_parent = sum(mf_df.mass_spectrum_deconvoluted_parent)
202-
for mf_id in mf_df.index:
203-
if myLCMSobj.mass_features[mf_id].mass_spectrum_deconvoluted_parent:
204-
if i > 10: # TODO KRH: remove this when ready
205-
break
206-
print("searching mf: ", str(i), " of ", str(total_decon_parent))
207-
208-
scan = myLCMSobj.mass_features[mf_id].apex_scan
209-
# Search single spectrum for all peaks that correspond to the same scan
210-
mf_df_scan = mf_df[mf_df.apex_scan == scan]
211-
peaks_to_search = [
212-
myLCMSobj.mass_features[x].ms1_peak for x in mf_df_scan.index.tolist()
213-
]
214-
time_start = time.time()
215-
SearchMolecularFormulas(
216-
myLCMSobj._ms[scan],
217-
first_hit=False,
218-
find_isotopologues=True,
219-
).run_worker_ms_peaks(peaks_to_search)
220-
print(
221-
"time to search whole spectrum for all peaks in scan: ",
222-
time.time() - time_start,
223-
)
224-
i += 1
196+
mol_search = SearchMolecularFormulasLC(myLCMSobj)
197+
mol_search.run_mass_feature_search()
225198
print("Finished molecular search")
226199

227200

@@ -634,15 +607,14 @@ def run_lipid_workflow(
634607
mz_dicts = pool.starmap(run_lipid_ms2, args)
635608
pool.close()
636609
pool.join()
637-
638610
print("Finished processing, data are written in " + str(out_dir))
639611

640612

641613
if __name__ == "__main__":
642614
# Set input variables to run
643615
cores = 1
644616
file_dir = Path("tmp_data/thermo_raw_mini")
645-
out_dir = Path("tmp_data/NMDC_processed_241113")
617+
out_dir = Path("tmp_data/_test_241216")
646618
params_toml = Path("tmp_data/EMSL_lipidomics_params.toml")
647619
metab_ref_token = Path("tmp_data/thermo_raw_collection/metabref.token")
648620
verbose = True

0 commit comments

Comments
 (0)