Skip to content

Commit c868a8c

Browse files
committed
Merge branch 'masscalibration2024' into 'master'
Masscalibration2024 See merge request mass-spectrometry/corems!117
2 parents 9f221e2 + 977d449 commit c868a8c

8 files changed

Lines changed: 472 additions & 74 deletions

File tree

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 2.0.9
2+
current_version = 2.0.10
33
commit = False
44
tag = False
55

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ CoreMS aims to provide
5050

5151
## Current Version
5252

53-
`2.0.9`
53+
`2.0.10`
5454

5555
***
5656

@@ -323,7 +323,7 @@ UML (unified modeling language) diagrams for Direct Infusion FT-MS and GC-MS cla
323323
324324
If you use CoreMS in your work, please use the following citation:
325325
326-
Version [2.0.9 Release on GitHub](https://github.com/EMSL-Computing/CoreMS/releases/tag/v2.0.9), archived on Zenodo:
326+
Version [2.0.10 Release on GitHub](https://github.com/EMSL-Computing/CoreMS/releases/tag/v2.0.10), archived on Zenodo:
327327
328328
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4641552.svg)](https://doi.org/10.5281/zenodo.4641552)
329329

corems/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
__author__ = 'Yuri E. Corilo'
2-
__version__ = '2.0.9'
2+
__version__ = '2.0.10'
33
__doc__ = '''
44
<div align="left">
55

corems/encapsulation/factory/processingSetting.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ class LiquidChromatographSetting:
135135
0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
136136
"""
137137

138-
scans: list | tuple = (0, 1)
138+
scans: list | tuple = (-1,-1)
139139

140140
eic_tolerance_ppm: float = 5
141141

@@ -224,6 +224,10 @@ class MassSpectrumSetting:
224224
Minimum ppm error to use for calibration. Default is -1.0.
225225
calib_sn_threshold : float, optional
226226
Signal to noise threshold to use for calibration. Default is 2.0.
227+
calibration_ref_match_method: string, optional
228+
Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
229+
calibration_ref_match_tolerance: float, optional
230+
If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
227231
do_calibration : bool, optional
228232
If True, perform calibration. Default is True.
229233
"""
@@ -259,6 +263,10 @@ class MassSpectrumSetting:
259263
max_calib_ppm_error: float = 1.0
260264
min_calib_ppm_error: float = -1.0
261265
calib_sn_threshold: float = 2.0
266+
calibration_ref_match_method: str = 'legacy'
267+
calibration_ref_match_method_implemented: tuple = ('legacy', 'merged')
268+
calibration_ref_match_tolerance: float = 0.003
269+
calibration_ref_match_std_raw_error_limit: float = 1.5
262270
#calib_ref_mzs: list = [0]
263271

264272
do_calibration: bool = True

corems/mass_spectrum/calc/Calibration.py

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def load_ref_mass_list(self):
116116
3: 'Form2'
117117
}, axis=1)
118118

119-
df_ref.sort_values(by='m/z', ascending=False)
119+
df_ref.sort_values(by='m/z', ascending=True,inplace=True)
120120
print("Reference mass list loaded - " + str(len(df_ref)) + " calibration masses loaded.")
121121

122122
return df_ref
@@ -150,7 +150,10 @@ def gen_ref_mass_list_from_assigned(self, min_conf : float=0.7):
150150

151151
def find_calibration_points(self, df_ref,
152152
calib_ppm_error_threshold : tuple[float, float]=(-1, 1),
153-
calib_snr_threshold : float=5):
153+
calib_snr_threshold : float=5,
154+
calibration_ref_match_method : str='legacy',
155+
calibration_ref_match_tolerance : float=0.003,
156+
calibration_ref_match_std_raw_error_limit: float=1.5):
154157
"""Function to find calibration points in the mass spectrum
155158
156159
Based on the reference mass list.
@@ -186,23 +189,49 @@ def find_calibration_points(self, df_ref,
186189
peaks_mz.append(x.mz_exp)
187190
peaks_mz = np.asarray(peaks_mz)
188191

189-
cal_peaks_mz = []
190-
cal_refs_mz = []
191-
for mzref in df_ref['m/z']:
192-
tmp_peaks_mz = peaks_mz[abs(peaks_mz-mzref)<1]
193-
for mzmeas in tmp_peaks_mz:
194-
delta_mass = ((mzmeas-mzref)/mzref)*1e6
195-
if delta_mass < max(calib_ppm_error_threshold):
196-
if delta_mass > min(calib_ppm_error_threshold):
197-
cal_peaks_mz.append(mzmeas)
198-
cal_refs_mz.append(mzref)
199-
200-
# To remove entries with duplicated indices (reference masses matching multiple peaks)
201-
tmpdf = pd.Series(index = cal_refs_mz,data = cal_peaks_mz,dtype=float)
202-
tmpdf = tmpdf[~tmpdf.index.duplicated(keep=False)]
203-
204-
cal_peaks_mz = list(tmpdf.values)
205-
cal_refs_mz = list(tmpdf.index)
192+
if calibration_ref_match_method == 'legacy':
193+
# This legacy approach iterates through each reference match and finds the entries within 1 mz and within the user defined PPM error threshold
194+
# Then it removes ambiguities - which means the calibration threshold hasto be very tight.
195+
cal_peaks_mz = []
196+
cal_refs_mz = []
197+
for mzref in df_ref['m/z']:
198+
tmp_peaks_mz = peaks_mz[abs(peaks_mz-mzref)<1]
199+
for mzmeas in tmp_peaks_mz:
200+
delta_mass = ((mzmeas-mzref)/mzref)*1e6
201+
if delta_mass < max(calib_ppm_error_threshold):
202+
if delta_mass > min(calib_ppm_error_threshold):
203+
cal_peaks_mz.append(mzmeas)
204+
cal_refs_mz.append(mzref)
205+
206+
# To remove entries with duplicated indices (reference masses matching multiple peaks)
207+
tmpdf = pd.Series(index = cal_refs_mz,data = cal_peaks_mz,dtype=float)
208+
tmpdf = tmpdf[~tmpdf.index.duplicated(keep=False)]
209+
210+
cal_peaks_mz = list(tmpdf.values)
211+
cal_refs_mz = list(tmpdf.index)
212+
elif calibration_ref_match_method == 'merged':
213+
print('Using experimental new reference mass list merging')
214+
# This is a new approach (August 2024) which uses Pandas 'merged_asof' to find the peaks closest in m/z between
215+
# reference and measured masses. This is a quicker way to match, and seems to get more matches.
216+
# It may not work as well when the data are far from correc initial mass
217+
# e.g. if the correct peak is further from the reference than an incorrect peak.
218+
meas_df = pd.DataFrame(columns=['meas_m/z'],data = peaks_mz)
219+
tolerance = calibration_ref_match_tolerance
220+
merged_df = pd.merge_asof(df_ref, meas_df, left_on='m/z', right_on = 'meas_m/z',tolerance=tolerance,direction='nearest')
221+
merged_df.dropna(how='any',inplace=True)
222+
merged_df['Error_ppm'] = ((merged_df['meas_m/z']-merged_df['m/z'])/merged_df['m/z'])*1e6
223+
median_raw_error = merged_df['Error_ppm'].median()
224+
std_raw_error = merged_df['Error_ppm'].std()
225+
if std_raw_error > calibration_ref_match_std_raw_error_limit:
226+
std_raw_error = calibration_ref_match_std_raw_error_limit
227+
self.mass_spectrum.calibration_raw_error_median = median_raw_error
228+
self.mass_spectrum.calibration_raw_error_stdev = std_raw_error
229+
merged_df= merged_df[(merged_df['Error_ppm']>(median_raw_error-1.5*std_raw_error))&(merged_df['Error_ppm']<(median_raw_error+1.5*std_raw_error))]
230+
#merged_df= merged_df[(merged_df['Error_ppm']>min(calib_ppm_error_threshold))&(merged_df['Error_ppm']<max(calib_ppm_error_threshold))]
231+
cal_peaks_mz = list(merged_df['meas_m/z'])
232+
cal_refs_mz = list(merged_df['m/z'])
233+
else:
234+
print(f'{calibration_ref_match_method} not allowed.')
206235

207236
if False:
208237
min_calib_ppm_error = calib_ppm_error_threshold[0]
@@ -420,6 +449,9 @@ def run(self):
420449
max_calib_ppm_error = self.mass_spectrum.settings.max_calib_ppm_error
421450
min_calib_ppm_error = self.mass_spectrum.settings.min_calib_ppm_error
422451
calib_pol_order = self.mass_spectrum.settings.calib_pol_order
452+
calibration_ref_match_method = self.mass_spectrum.settings.calibration_ref_match_method
453+
calibration_ref_match_tolerance = self.mass_spectrum.settings.calibration_ref_match_tolerance
454+
calibration_ref_match_std_raw_error_limit = self.mass_spectrum.settings.calibration_ref_match_std_raw_error_limit
423455

424456
# load reference mass list
425457
df_ref = self.load_ref_mass_list()
@@ -428,7 +460,10 @@ def run(self):
428460
cal_peaks_mz, cal_refs_mz = self.find_calibration_points(df_ref,
429461
calib_ppm_error_threshold=(min_calib_ppm_error,
430462
max_calib_ppm_error),
431-
calib_snr_threshold=calib_ppm_error_threshold)
463+
calib_snr_threshold=calib_ppm_error_threshold,
464+
calibration_ref_match_method = calibration_ref_match_method,
465+
calibration_ref_match_tolerance = calibration_ref_match_tolerance,
466+
calibration_ref_match_std_raw_error_limit = calibration_ref_match_std_raw_error_limit)
432467
if len(cal_peaks_mz)==2:
433468
self.mass_spectrum.settings.calib_pol_order = 1
434469
calib_pol_order = 1

corems/mass_spectrum/factory/MassSpectrumClasses.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ def __init__(self, mz_exp, abundance, d_params, **kwargs):
119119
self.calibration_points = None
120120
self.calibration_RMS = None
121121
self.calibration_segment = None
122+
self.calibration_raw_error_median = None
123+
self.calibration_raw_error_stdev = None
122124

123125
def _init_settings(self):
124126
"""Initializes the settings for the mass spectrum."""

examples/notebooks/Mass_Recalibration_Example.ipynb

Lines changed: 401 additions & 48 deletions
Large diffs are not rendered by default.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# This call to setup() does all the work
1515
setup(
1616
name="CoreMS",
17-
version="2.0.9",
17+
version="2.0.10",
1818
description="Mass Spectrometry Framework for Small Molecules Analysis",
1919
long_description=long_description,
2020
long_description_content_type="text/markdown",

0 commit comments

Comments
 (0)