|
20 | 20 | from corems.mass_spectra.input.mzml import MZMLSpectraParser |
21 | 21 | from corems.mass_spectra.input.rawFileReader import ImportMassSpectraThermoMSFileReader |
22 | 22 | from corems.mass_spectra.output.export import LipidomicsExport |
23 | | -from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas |
| 23 | +from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas, SearchMolecularFormulasLC |
24 | 24 | from corems.molecular_id.search.database_interfaces import MetabRefLCInterface |
25 | 25 | from corems.encapsulation.input.parameter_from_json import ( |
26 | 26 | load_and_set_toml_parameters_lcms, |
@@ -193,35 +193,8 @@ def molecular_formula_search(myLCMSobj): |
193 | 193 | ------- |
194 | 194 | None, processes the LCMS object |
195 | 195 | """ |
196 | | - i = 1 |
197 | | - # get df of mass features |
198 | | - mf_df = myLCMSobj.mass_features_to_df() |
199 | | - |
200 | | - # search molecular formulas for each mass feature |
201 | | - total_decon_parent = sum(mf_df.mass_spectrum_deconvoluted_parent) |
202 | | - for mf_id in mf_df.index: |
203 | | - if myLCMSobj.mass_features[mf_id].mass_spectrum_deconvoluted_parent: |
204 | | - if i > 10: # TODO KRH: remove this when ready |
205 | | - break |
206 | | - print("searching mf: ", str(i), " of ", str(total_decon_parent)) |
207 | | - |
208 | | - scan = myLCMSobj.mass_features[mf_id].apex_scan |
209 | | - # Search single spectrum for all peaks that correspond to the same scan |
210 | | - mf_df_scan = mf_df[mf_df.apex_scan == scan] |
211 | | - peaks_to_search = [ |
212 | | - myLCMSobj.mass_features[x].ms1_peak for x in mf_df_scan.index.tolist() |
213 | | - ] |
214 | | - time_start = time.time() |
215 | | - SearchMolecularFormulas( |
216 | | - myLCMSobj._ms[scan], |
217 | | - first_hit=False, |
218 | | - find_isotopologues=True, |
219 | | - ).run_worker_ms_peaks(peaks_to_search) |
220 | | - print( |
221 | | - "time to search whole spectrum for all peaks in scan: ", |
222 | | - time.time() - time_start, |
223 | | - ) |
224 | | - i += 1 |
| 196 | + mol_search = SearchMolecularFormulasLC(myLCMSobj) |
| 197 | + mol_search.run_mass_feature_search() |
225 | 198 | print("Finished molecular search") |
226 | 199 |
|
227 | 200 |
|
@@ -634,15 +607,14 @@ def run_lipid_workflow( |
634 | 607 | mz_dicts = pool.starmap(run_lipid_ms2, args) |
635 | 608 | pool.close() |
636 | 609 | pool.join() |
637 | | - |
638 | 610 | print("Finished processing, data are written in " + str(out_dir)) |
639 | 611 |
|
640 | 612 |
|
641 | 613 | if __name__ == "__main__": |
642 | 614 | # Set input variables to run |
643 | 615 | cores = 1 |
644 | 616 | file_dir = Path("tmp_data/thermo_raw_mini") |
645 | | - out_dir = Path("tmp_data/NMDC_processed_241113") |
| 617 | + out_dir = Path("tmp_data/_test_241216") |
646 | 618 | params_toml = Path("tmp_data/EMSL_lipidomics_params.toml") |
647 | 619 | metab_ref_token = Path("tmp_data/thermo_raw_collection/metabref.token") |
648 | 620 | verbose = True |
|
0 commit comments