Skip to content

Commit 0948f70

Browse files
committed
Revert "start ability to log noise centroid data and add calibration mzs to msobj"
This reverts commit e65db2cd0f7ef9e8f0190ea40510016d75ae7902.
1 parent 20d94ed commit 0948f70

4 files changed

Lines changed: 37 additions & 88 deletions

File tree

corems/mass_spectrum/calc/Calibration.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,8 @@ def recalibrate_mass_spectrum(self, cal_peaks_mz : list[float], cal_refs_mz : li
427427
self.mass_spectrum.calibration_order = order
428428
self.mass_spectrum.calibration_RMS = float(res['fun'])
429429
self.mass_spectrum.calibration_points = int(len(cal_refs_mz))
430-
self.mass_spectrum.calibration_ref_mzs = cal_refs_mz
431-
self.mass_spectrum.calibration_meas_mzs = cal_peaks_mz
430+
self.mass_spectrum.calib_ref_mzs = cal_refs_mz
431+
#self.mass_spectrum.calib_meas_mzs = cal_refs_mz
432432

433433
self.mass_spectrum.calibration_segment = self.mzsegment
434434

corems/mass_spectrum/calc/NoiseCalc.py

Lines changed: 25 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import time
22
from typing import Tuple
33

4-
from numpy import where, average, std, isnan, inf, hstack, median, argmax, percentile, log10, histogram, nan, asarray
4+
from numpy import where, average, std, isnan, inf, hstack, median, argmax, percentile, log10, histogram, nan
55
#from scipy.signal import argrelmax
66
from corems import chunks
77
import warnings
@@ -148,7 +148,6 @@ def get_noise_threshold(self) -> Tuple[Tuple[float, float], Tuple[float,float ]]
148148

149149
def cut_mz_domain_noise(self):
150150
"""Cut the m/z domain to the noise threshold regions.
151-
Full profile data is used here.
152151
153152
Returns
154153
-------
@@ -199,49 +198,6 @@ def cut_mz_domain_noise(self):
199198
# pyplot.show()
200199
return self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index]
201200

202-
def cut_mz_domain_noise_centroid(self):
203-
"""Cut the m/z domain to the noise threshold regions.
204-
Centroided data is used here.
205-
206-
Returns
207-
-------
208-
Tuple[np.array, np.array]
209-
A tuple containing the m/z and abundance arrays of the truncated spectrum region.
210-
"""
211-
mz_exp_tmp = asarray(self._mz_exp)
212-
min_mz_whole_ms = mz_exp_tmp.min()
213-
max_mz_whole_ms = mz_exp_tmp.max()
214-
215-
if self.settings.noise_threshold_method == 'minima':
216-
217-
# this calculation is taking too long (about 2 seconds)
218-
number_average_molecular_weight = self.weight_average_molecular_weight(
219-
profile=True)
220-
221-
# +-200 is a guess for testing only, it needs adjustment for each type of analysis
222-
# need to check min mz here or it will break
223-
min_mz_noise = number_average_molecular_weight - 100
224-
# need to check max mz here or it will break
225-
max_mz_noise = number_average_molecular_weight + 100
226-
227-
else:
228-
min_mz_noise = self.settings.noise_min_mz
229-
max_mz_noise = self.settings.noise_max_mz
230-
231-
if min_mz_noise < min_mz_whole_ms:
232-
min_mz_noise = min_mz_whole_ms
233-
234-
if max_mz_noise > max_mz_whole_ms:
235-
max_mz_noise = max_mz_whole_ms
236-
237-
low_mz_index = (where(mz_exp_tmp >= min_mz_noise)[0][0])
238-
high_mz_index = (where(mz_exp_tmp <= max_mz_noise)[-1][-1])
239-
240-
if high_mz_index > low_mz_index:
241-
return self._mz_exp[high_mz_index:low_mz_index], self._abundance[low_mz_index:high_mz_index]
242-
else:
243-
return self._mz_exp[high_mz_index:low_mz_index], self._abundance[high_mz_index:low_mz_index]
244-
245201

246202
def get_noise_average(self, ymincentroid):
247203
""" Get the average noise and standard deviation.
@@ -333,34 +289,33 @@ def run_log_noise_threshold_calc(self):
333289
"""
334290

335291
if self.is_centroid:
336-
warnings.warn("log noise not tested for centroid data - proceed with caution")
337-
mz_cut, abundance_cut = self.cut_mz_domain_noise_centroid()
292+
raise Exception("log noise Not tested for centroid data")
338293
else:
339294
# cut the spectrum to ROI
340295
mz_cut, abundance_cut = self.cut_mz_domain_noise()
341-
# If there are 0 values, the log will fail
342-
# But we may have negative values for aFT data, so we check if 0 exists
343-
# Need to make a copy of the abundance cut values so we dont overwrite it....
344-
tmp_abundance = abundance_cut.copy()
345-
if 0 in tmp_abundance:
346-
tmp_abundance[tmp_abundance==0] = nan
347-
tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
348-
# It seems there are edge cases of sparse but high S/N data where the wrong values may be determined.
349-
# Hard to generalise - needs more investigation.
350-
351-
# calculate a histogram of the log10 of the abundance data
352-
hist_values = histogram(log10(tmp_abundance),bins=self.settings.noise_threshold_log_nsigma_bins)
353-
#find the apex of this histogram
354-
maxvalidx = where(hist_values[0] == max(hist_values[0]))
355-
# get the value of this apex (note - still in log10 units)
356-
log_sigma = hist_values[1][maxvalidx]
357-
# If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
358-
if len(log_sigma)>1:
359-
log_sigma = average(log_sigma)
360-
## To do : check if aFT or mFT and adjust method
361-
noise_mid = 10**log_sigma
362-
noise_1std = noise_mid*self.settings.noise_threshold_log_nsigma_corr_factor #for mFT 0.463
363-
return float(noise_mid), float(noise_1std)
296+
# If there are 0 values, the log will fail
297+
# But we may have negative values for aFT data, so we check if 0 exists
298+
# Need to make a copy of the abundance cut values so we dont overwrite it....
299+
tmp_abundance = abundance_cut.copy()
300+
if 0 in tmp_abundance:
301+
tmp_abundance[tmp_abundance==0] = nan
302+
tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
303+
# It seems there are edge cases of sparse but high S/N data where the wrong values may be determined.
304+
# Hard to generalise - needs more investigation.
305+
306+
# calculate a histogram of the log10 of the abundance data
307+
hist_values = histogram(log10(tmp_abundance),bins=self.settings.noise_threshold_log_nsigma_bins)
308+
#find the apex of this histogram
309+
maxvalidx = where(hist_values[0] == max(hist_values[0]))
310+
# get the value of this apex (note - still in log10 units)
311+
log_sigma = hist_values[1][maxvalidx]
312+
# If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
313+
if len(log_sigma)>1:
314+
log_sigma = average(log_sigma)
315+
## To do : check if aFT or mFT and adjust method
316+
noise_mid = 10**log_sigma
317+
noise_1std = noise_mid*self.settings.noise_threshold_log_nsigma_corr_factor #for mFT 0.463
318+
return float(noise_mid), float(noise_1std)
364319

365320
def run_noise_threshold_calc(self):
366321
""" Runs noise threshold calculation (not log based method)

corems/mass_spectrum/calc/PeakPicking.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ def get_threshold(self, intes):
612612

613613
elif noise_threshold_method == 'log':
614614
if self.is_centroid:
615-
warnings.warn("log noise not tested for centroid data - proceed with caution")
615+
raise Exception("log noise Not tested for centroid data")
616616
abundance_threshold = self.settings.noise_threshold_log_nsigma
617617
factor = self.baseline_noise_std
618618

corems/mass_spectrum/factory/MassSpectrumClasses.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from pathlib import Path
22
from copy import deepcopy
33

4-
import warnings
54

65
#from matplotlib import rcParamsDefault, rcParams
76
from numpy import array, power, float64, where, histogram, trapz
@@ -64,10 +63,6 @@ class MassSpecBase(MassSpecCalc, KendrickGrouping):
6463
The root mean square of the mass spectrum's calibration.
6564
calibration_segment : None or CalibrationSegment
6665
The calibration segment of the mass spectrum.
67-
calibration_ref_mzs: None or list
68-
Reference masses used for calibration
69-
calibration_meas_mzs: None or list
70-
Measured masses used for calibration
7166
_abundance : ndarray
7267
The abundance values of the mass spectrum.
7368
_mz_exp : ndarray
@@ -126,8 +121,6 @@ def __init__(self, mz_exp, abundance, d_params, **kwargs):
126121
self.calibration_segment = None
127122
self.calibration_raw_error_median = None
128123
self.calibration_raw_error_stdev = None
129-
self.calibration_ref_mzs = None
130-
self.calibration_meas_mzs = None
131124

132125
def _init_settings(self):
133126
"""Initializes the settings for the mass spectrum."""
@@ -1504,13 +1497,6 @@ def process_mass_spec(self):
15041497
#print("Loading mass spectrum object")
15051498

15061499
abun = array(data_dict.get(Labels.abundance)).astype(float)
1507-
1508-
if self.label != Labels.thermo_centroid:
1509-
if self.settings.noise_threshold_method == 'log':
1510-
warnings.warn("log noise not tested for centroid data")
1511-
self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
1512-
else:
1513-
self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
15141500

15151501
abundance_threshold, factor = self.get_threshold(abun)
15161502

@@ -1551,7 +1537,15 @@ def process_mass_spec(self):
15511537
self._dynamic_range = self.max_abundance / self.min_abundance
15521538
self._set_nominal_masses_start_final_indexes()
15531539

1554-
1540+
if self.label != Labels.thermo_centroid:
1541+
1542+
if self.settings.noise_threshold_method == 'log':
1543+
1544+
raise Exception("log noise Not tested for centroid data")
1545+
#self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
1546+
1547+
else:
1548+
self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
15551549

15561550
del self.data_dict
15571551

0 commit comments

Comments
 (0)