Skip to content

Commit 581da35

Browse files
committed
Merge branch 'verbosity_fixes_dec2024' into 'master'
Verbosity fixes dec2024 See merge request mass-spectrometry/corems!146
2 parents b6de888 + 3c85422 commit 581da35

14 files changed

Lines changed: 146 additions & 86 deletions

File tree

corems/__init__.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,22 @@
2121

2222

2323

24-
def timeit(method):
25-
def timed(*args, **kw):
26-
ts = time.time()
27-
result = method(*args, **kw)
28-
te = time.time()
29-
if "log_time" in kw:
30-
name = kw.get("log_name", method.__name__.upper())
31-
kw["log_time"][name] = int((te - ts) * 1000)
32-
else:
33-
print("%r %2.2f ms" % (method.__name__, (te - ts) * 1000))
34-
return result
35-
36-
return timed
24+
def timeit(print_time=True):
25+
def decorator(method):
26+
def timed(*args, **kw):
27+
# Extract print_time from kwargs if provided
28+
local_print_time = kw.pop('print_time', print_time)
29+
ts = time.time()
30+
result = method(*args, **kw)
31+
te = time.time()
32+
if "log_time" in kw:
33+
name = kw.get("log_name", method.__name__.upper())
34+
kw["log_time"][name] = int((te - ts) * 1000)
35+
elif local_print_time:
36+
print("%r %2.2f ms" % (method.__name__, (te - ts) * 1000))
37+
return result
38+
return timed
39+
return decorator
3740

3841

3942
class SuppressPrints:

corems/encapsulation/factory/parameters.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@
1313
)
1414
from corems.encapsulation.factory.processingSetting import DataInputSetting
1515

16+
def hush_output():
17+
"""Toggle all the verbose_processing flags to False on the MSParameters, GCMSParameters and LCMSParameters classes"""
18+
MSParameters.molecular_search.verbose_processing = False
19+
MSParameters.mass_spectrum.verbose_processing = False
20+
GCMSParameters.gc_ms.verbose_processing = False
21+
LCMSParameters.lc_ms.verbose_processing = False
1622

1723
def reset_ms_parameters():
1824
"""Reset the MSParameter class to the default values"""

corems/encapsulation/factory/processingSetting.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,7 +816,10 @@ class MolecularFormulaSearchSettings:
816816
m/z error average. Default is 0.0.
817817
used_atom_valences : dict, optional
818818
Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
819+
verbose_processing: bool, optional
820+
If True, print verbose processing information. Default is True.
819821
"""
822+
verbose_processing: bool = True
820823

821824
use_isotopologue_filter: bool = False
822825

corems/mass_spectrum/calc/Calibration.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def __init__(self, mass_spectrum, ref_masslist, mzsegment=None):
6565

6666
# define reference mass list - bruker .ref format
6767
self.ref_mass_list_path = ref_masslist
68-
if self.mass_spectrum.percentile_assigned()[0] != 0:
68+
if self.mass_spectrum.percentile_assigned(mute_output=True)[0] != 0:
6969
warnings.warn(
7070
"Warning: calibrating spectra which have already been assigned may yield erroneous results"
7171
)
@@ -77,10 +77,6 @@ def __init__(self, mass_spectrum, ref_masslist, mzsegment=None):
7777
"MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found."
7878
)
7979

80-
print(
81-
"MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found."
82-
)
83-
8480
def load_ref_mass_list(self):
8581
"""Load reference mass list (Bruker format)
8682
@@ -228,7 +224,7 @@ def find_calibration_points(
228224
cal_peaks_mz = list(tmpdf.values)
229225
cal_refs_mz = list(tmpdf.index)
230226
elif calibration_ref_match_method == "merged":
231-
warnings.warn("Using experimental new reference mass list merging")
227+
#warnings.warn("Using experimental new reference mass list merging")
232228
# This is a new approach (August 2024) which uses Pandas 'merged_asof' to find the peaks closest in m/z between
233229
# reference and measured masses. This is a quicker way to match, and seems to get more matches.
234230
# It may not work as well when the data are far from correc initial mass

corems/mass_spectrum/calc/MassErrorPrediction.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,12 @@ def get_results(self):
9090

9191
def calc_error_dist(self):
9292
"""Calculate the error distribution."""
93+
verbose = self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing
9394
results_list = []
9495

9596
indexes_without_results = list(range(len(self.mass_spectrum_obj)))
9697
# loop trough mass spectrum
97-
98-
for peak_obj_idx, peak_obj in enumerate(tqdm(self.mass_spectrum_obj)):
98+
for peak_obj_idx, peak_obj in enumerate(tqdm(self.mass_spectrum_obj), disable=not verbose):
9999
# access ms peaks triplets ( peak_obj_idx -1, peak_obj_idx, and peak_obj_idx + 1)
100100
# check lower and upper boundaries to not excesses mass spectrum range
101101

corems/mass_spectrum/calc/MassSpectrumCalc.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,16 @@ class MassSpecCalc(PeakPicking, NoiseThresholdCalc):
3838
Calculate the weight average molecular weight
3939
"""
4040

41-
def percentile_assigned(self, report_error: bool = False):
41+
def percentile_assigned(self, report_error: bool = False, mute_output: bool = False):
4242
"""Percentage of peaks which are assigned
4343
4444
Parameters
4545
-----------
4646
report_error: bool, optional
4747
Report the error of the assigned peaks. Default is False.
48+
mute_output: bool, optional
49+
Override the verbose setting. Default is False.
50+
If True, the function will silence results
4851
"""
4952
verbose = self.parameters.mass_spectrum.verbose_processing
5053
assign_abun = 0
@@ -68,15 +71,15 @@ def percentile_assigned(self, report_error: bool = False):
6871
total_relative_abundance = (assign_abun / (not_assign_abun + assign_abun)) * 100
6972
if report_error:
7073
rms_error = sqrt(mean(array(error) ** 2))
71-
if verbose:
74+
if verbose and not mute_output:
7275
print(
7376
"%i assigned peaks and %i unassigned peaks, total = %.2f %%, relative abundance = %.2f %%, RMS error (best candidate) (ppm) = %.3f"
7477
% (i, j, total_percent, total_relative_abundance, rms_error)
7578
)
7679
return i, j, total_percent, total_relative_abundance, rms_error
7780

7881
else:
79-
if verbose:
82+
if verbose and not mute_output:
8083
print(
8184
"%i assigned peaks and %i unassigned peaks , total = %.2f %%, relative abundance = %.2f %%"
8285
% (

corems/molecular_id/factory/MolecularLookupTable.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -364,14 +364,17 @@ def add_carbonsHydrogens(self, settings, existing_classes_objs):
364364
self.sql_db.session.execute(insert_query)
365365
self.sql_db.session.commit()
366366

367-
@timeit
368-
def runworker(self, molecular_search_settings):
367+
@timeit(print_time=True)
368+
def runworker(self, molecular_search_settings, **kwargs):
369369
"""Run the molecular formula lookup table worker.
370370
371371
Parameters
372372
----------
373373
molecular_search_settings : object
374374
An object containing user-defined settings.
375+
kwargs : dict
376+
A dictionary of keyword arguments.
377+
Most notably, the print_time argument which is passed to the timeit decorator.
375378
376379
Returns
377380
-------
@@ -380,6 +383,7 @@ def runworker(self, molecular_search_settings):
380383
381384
382385
"""
386+
verbose = molecular_search_settings.verbose_processing
383387

384388
classes_list, class_to_create, existing_classes_objs = (
385389
self.check_database_get_class_list(molecular_search_settings)
@@ -412,7 +416,7 @@ def runworker(self, molecular_search_settings):
412416
self.even_ch_dbe = [obj.dbe for obj in even_ch_obj]
413417

414418
all_results = list()
415-
for class_tuple in tqdm(class_to_create):
419+
for class_tuple in tqdm(class_to_create, disable = not verbose):
416420
results = self.populate_combinations(class_tuple, settings)
417421
all_results.extend(results)
418422
if settings.db_jobs == 1:
@@ -438,8 +442,8 @@ def runworker(self, molecular_search_settings):
438442
]
439443
p = multiprocessing.Pool(settings.db_jobs)
440444
for class_list in tqdm(
441-
p.imap_unordered(insert_database_worker, worker_args)
442-
):
445+
p.imap_unordered(insert_database_worker, worker_args), disable= not verbose
446+
):
443447
pass
444448
p.close()
445449
p.join()

corems/molecular_id/factory/molecularSQL.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,7 @@ def get_dict_by_classes(
473473
if the number of classes and nominal_m/zs are higher than 999 the query will fail
474474
Solution: use postgres or split query
475475
"""
476+
verbose = molecular_search_settings.verbose_processing
476477

477478
def query_normal(class_list, len_adduct):
478479
"""query for normal database
@@ -565,10 +566,7 @@ def nominal_mass_by_ion_type(formula_obj):
565566

566567
elif ion_type == Labels.adduct_ion and adduct_atom:
567568
return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
568-
569-
for formula_obj, ch_obj, classe_obj in tqdm.tqdm(
570-
formulas, desc="Loading molecular formula database"
571-
):
569+
for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database", disable = not verbose):
572570
nominal_mz = nominal_mass_by_ion_type(formula_obj)
573571

574572
if self.type != "normal":

corems/molecular_id/search/compoundSearch.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi):
127127

128128
return spectral_similarity_scores, ri_score, similarity_score
129129

130-
# @timeit
131130
def run(self):
132131
"""Runs the low-resolution mass spectral match."""
133132
# TODO select the best gcms peak
@@ -148,8 +147,8 @@ def run(self):
148147
self.gcms_obj.chromatogram_settings.use_deconvolution = (
149148
original_use_deconvolution
150149
)
151-
152-
for gc_peak in tqdm.tqdm(self.gcms_obj):
150+
verbose = self.gcms_obj.chromatogram_settings.verbose_processing
151+
for gc_peak in tqdm.tqdm(self.gcms_obj, disable = not verbose):
153152
if not self.calibration:
154153
window = self.gcms_obj.molecular_search_settings.ri_search_range
155154

corems/molecular_id/search/molecularFormulaSearch.py

Lines changed: 49 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,10 @@ def get_formulas(nominal_overlay: float = 0.1):
191191

192192
def run_worker_mass_spectrum(self):
193193
"""Run the molecular formula search on the mass spectrum object."""
194-
self.run_molecular_formula(self.mass_spectrum_obj.sort_by_abundance())
194+
self.run_molecular_formula(
195+
self.mass_spectrum_obj.sort_by_abundance(),
196+
print_time=self.mass_spectrum_obj.molecular_search_settings.verbose_processing
197+
)
195198

196199
def run_worker_ms_peaks(self, ms_peaks):
197200
"""Run the molecular formula search on the given list of mass spectrum peaks.
@@ -201,7 +204,10 @@ def run_worker_ms_peaks(self, ms_peaks):
201204
ms_peaks : list of MSPeak
202205
The list of mass spectrum peaks.
203206
"""
204-
self.run_molecular_formula(ms_peaks)
207+
self.run_molecular_formula(
208+
ms_peaks,
209+
print_time=self.mass_spectrum_obj.molecular_search_settings.verbose_processing
210+
)
205211

206212
@staticmethod
207213
def database_to_dict(classe_str_list, nominal_mzs, mf_search_settings, ion_charge):
@@ -262,14 +268,18 @@ def database_to_dict(classe_str_list, nominal_mzs, mf_search_settings, ion_charg
262268

263269
return dict_res
264270

265-
@timeit
266-
def run_molecular_formula(self, ms_peaks):
271+
@timeit(print_time=True)
272+
def run_molecular_formula(self, ms_peaks, **kwargs):
267273
"""Run the molecular formula search on the given list of mass spectrum peaks.
268274
269275
Parameters
270276
----------
271277
ms_peaks : list of MSPeak
272278
The list of mass spectrum peaks.
279+
**kwargs
280+
Additional keyword arguments.
281+
Most notably, print_time, which is a boolean flag to indicate whether to print the time
282+
and passed to the timeit decorator.
273283
"""
274284
# number_of_process = multiprocessing.cpu_count()
275285

@@ -288,14 +298,16 @@ def run_molecular_formula(self, ms_peaks):
288298
# needs to improve to bin by mass defect instead, faster db creation and faster search execution time
289299
nominal_mzs = self.mass_spectrum_obj.nominal_mz
290300

301+
verbose = self.mass_spectrum_obj.molecular_search_settings.verbose_processing
291302
# reset average error, only relevant is average mass error method is being used
292303
SearchMolecularFormulaWorker(
293304
find_isotopologues=self.find_isotopologues
294305
).reset_error(self.mass_spectrum_obj)
295306

296307
# check database for all possible molecular formula combinations based on the setting passed to self.mass_spectrum_obj.molecular_search_settings
297308
classes = MolecularCombinations(self.sql_db).runworker(
298-
self.mass_spectrum_obj.molecular_search_settings
309+
self.mass_spectrum_obj.molecular_search_settings,
310+
print_time=self.mass_spectrum_obj.molecular_search_settings.verbose_processing
299311
)
300312

301313
# split the database load to not blowout the memory
@@ -315,22 +327,20 @@ def run():
315327
self.mass_spectrum_obj.molecular_search_settings,
316328
ion_charge,
317329
)
318-
319-
pbar = tqdm.tqdm(classe_chunk)
320-
330+
pbar = tqdm.tqdm(classe_chunk, disable = not verbose)
321331
for classe_tuple in pbar:
322332
# class string is a json serialized dict
323333
classe_str = classe_tuple[0]
324334
classe_dict = classe_tuple[1]
325335

326336
if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
327337
ion_type = Labels.protonated_de_ion
328-
329-
pbar.set_description_str(
330-
desc="Started molecular formula search for class %s, (de)protonated "
331-
% classe_str,
332-
refresh=True,
333-
)
338+
if verbose:
339+
pbar.set_description_str(
340+
desc="Started molecular formula search for class %s, (de)protonated "
341+
% classe_str,
342+
refresh=True,
343+
)
334344

335345
candidate_formulas = dict_res.get(ion_type).get(classe_str)
336346

@@ -344,11 +354,12 @@ def run():
344354
)
345355

346356
if self.mass_spectrum_obj.molecular_search_settings.isRadical:
347-
pbar.set_description_str(
348-
desc="Started molecular formula search for class %s, radical "
349-
% classe_str,
350-
refresh=True,
351-
)
357+
if verbose:
358+
pbar.set_description_str(
359+
desc="Started molecular formula search for class %s, radical "
360+
% classe_str,
361+
refresh=True,
362+
)
352363

353364
ion_type = Labels.radical_ion
354365

@@ -365,11 +376,12 @@ def run():
365376
# looks for adduct, used_atom_valences should be 0
366377
# this code does not support H exchance by halogen atoms
367378
if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
368-
pbar.set_description_str(
369-
desc="Started molecular formula search for class %s, adduct "
370-
% classe_str,
371-
refresh=True,
372-
)
379+
if verbose:
380+
pbar.set_description_str(
381+
desc="Started molecular formula search for class %s, adduct "
382+
% classe_str,
383+
refresh=True,
384+
)
373385

374386
ion_type = Labels.adduct_ion
375387
dict_atoms_formulas = dict_res.get(ion_type)
@@ -904,18 +916,24 @@ def run_untargeted_worker_ms1(self):
904916
# do molecular formula based on the parameters set for ms1 search
905917
for peak in self.lcms_obj:
906918
self.mass_spectrum_obj = peak.mass_spectrum
907-
self.run_molecular_formula(peak.mass_spectrum.sort_by_abundance())
919+
self.run_molecular_formula(
920+
peak.mass_spectrum.sort_by_abundance(),
921+
print_time=self.lcms_obj.parameters.lc_ms.verbose_processing
922+
)
908923

909924
def run_target_worker_ms1(self):
910925
"""Run targeted molecular formula search on the ms1 mass spectrum."""
911926
# do molecular formula based on the external molecular reference list
912-
pbar = tqdm.tqdm(self.lcms_obj)
927+
verbose = self.lcms_obj.parameters.lc_ms.verbose_processing
928+
if verbose:
929+
pbar = tqdm.tqdm(self.lcms_obj)
913930

914931
for peak in self.lcms_obj:
915-
pbar.set_description_str(
916-
desc=f"Started molecular formulae search for mass spectrum at RT {peak.retention_time} s",
917-
refresh=True,
918-
)
932+
if verbose:
933+
pbar.set_description_str(
934+
desc=f"Started molecular formulae search for mass spectrum at RT {peak.retention_time} s",
935+
refresh=True,
936+
)
919937

920938
self.mass_spectrum_obj = peak.mass_spectrum
921939

@@ -924,7 +942,7 @@ def run_target_worker_ms1(self):
924942
candidate_formulas = peak.targeted_molecular_formulas
925943

926944
for i in candidate_formulas:
927-
if self.lcms_obj.parameters.lc_ms.verbose_processing:
945+
if verbose:
928946
print(i)
929947
if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
930948
ion_type = Labels.protonated_de_ion

0 commit comments

Comments
 (0)