Revert "start ability to log noise centroid data and add calibration mzs to msobj"

wkew-pnnl · wkew-pnnl · commit 0948f70e8ab1 · 2024-09-25T16:21:29.000-07:00
This reverts commit e65db2cd0f7ef9e8f0190ea40510016d75ae7902.
diff --git a/corems/mass_spectrum/calc/Calibration.py b/corems/mass_spectrum/calc/Calibration.py
@@ -427,8 +427,8 @@ def recalibrate_mass_spectrum(self, cal_peaks_mz : list[float], cal_refs_mz : li
             self.mass_spectrum.calibration_order = order
             self.mass_spectrum.calibration_RMS = float(res['fun'])
             self.mass_spectrum.calibration_points = int(len(cal_refs_mz))
-            self.mass_spectrum.calibration_ref_mzs = cal_refs_mz
-            self.mass_spectrum.calibration_meas_mzs = cal_peaks_mz
+            self.mass_spectrum.calib_ref_mzs = cal_refs_mz
+            #self.mass_spectrum.calib_meas_mzs = cal_refs_mz
 
             self.mass_spectrum.calibration_segment = self.mzsegment
 
diff --git a/corems/mass_spectrum/calc/NoiseCalc.py b/corems/mass_spectrum/calc/NoiseCalc.py
@@ -1,7 +1,7 @@
 import time
 from typing import Tuple
 
-from numpy import where, average, std, isnan, inf, hstack, median, argmax, percentile, log10, histogram, nan, asarray
+from numpy import where, average, std, isnan, inf, hstack, median, argmax, percentile, log10, histogram, nan
 #from scipy.signal import argrelmax
 from corems import chunks
 import warnings
@@ -148,7 +148,6 @@ def get_noise_threshold(self) -> Tuple[Tuple[float, float], Tuple[float,float ]]
 
     def cut_mz_domain_noise(self):
         """Cut the m/z domain to the noise threshold regions.
-        Full profile data is used here.
 
         Returns
         -------
@@ -199,49 +198,6 @@ def cut_mz_domain_noise(self):
             # pyplot.show()
             return self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index]
       
-    def cut_mz_domain_noise_centroid(self):
-        """Cut the m/z domain to the noise threshold regions.
-        Centroided  data is used here.
-
-        Returns
-        -------
-        Tuple[np.array, np.array]
-            A tuple containing the m/z and abundance arrays of the truncated spectrum region.
-        """
-        mz_exp_tmp = asarray(self._mz_exp)
-        min_mz_whole_ms = mz_exp_tmp.min()
-        max_mz_whole_ms = mz_exp_tmp.max()
-
-        if self.settings.noise_threshold_method == 'minima':
-            
-            # this calculation is taking too long (about 2 seconds)
-            number_average_molecular_weight = self.weight_average_molecular_weight(
-                profile=True)
-           
-            # +-200 is a guess for testing only, it needs adjustment for each type of analysis
-            # need to check min mz here or it will break
-            min_mz_noise = number_average_molecular_weight - 100
-            # need to check max mz here or it will break
-            max_mz_noise = number_average_molecular_weight + 100
-
-        else:
-            min_mz_noise = self.settings.noise_min_mz
-            max_mz_noise = self.settings.noise_max_mz
-
-        if min_mz_noise < min_mz_whole_ms:
-            min_mz_noise = min_mz_whole_ms
-
-        if max_mz_noise > max_mz_whole_ms:
-            max_mz_noise = max_mz_whole_ms
-
-        low_mz_index = (where(mz_exp_tmp >= min_mz_noise)[0][0])
-        high_mz_index = (where(mz_exp_tmp <= max_mz_noise)[-1][-1])
-
-        if high_mz_index > low_mz_index:
-            return self._mz_exp[high_mz_index:low_mz_index], self._abundance[low_mz_index:high_mz_index]
-        else:
-            return self._mz_exp[high_mz_index:low_mz_index], self._abundance[high_mz_index:low_mz_index]
-      
 
     def get_noise_average(self, ymincentroid):
         """ Get the average noise and standard deviation.
@@ -333,34 +289,33 @@ def run_log_noise_threshold_calc(self):
         """
 
         if self.is_centroid:
-            warnings.warn("log noise not tested for centroid data - proceed with caution")
-            mz_cut, abundance_cut = self.cut_mz_domain_noise_centroid()
+            raise  Exception("log noise Not tested for centroid data")
         else:
             # cut the spectrum to ROI
             mz_cut, abundance_cut = self.cut_mz_domain_noise()
-        # If there are 0 values, the log will fail
-        # But we may have negative values for aFT data, so we check if 0 exists
-        # Need to make a copy of the abundance cut values so we dont overwrite it....
-        tmp_abundance = abundance_cut.copy()
-        if 0 in tmp_abundance:
-            tmp_abundance[tmp_abundance==0] = nan
-            tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
-            # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined. 
-            # Hard to generalise - needs more investigation.
-
-        # calculate a histogram of the log10 of the abundance data
-        hist_values = histogram(log10(tmp_abundance),bins=self.settings.noise_threshold_log_nsigma_bins) 
-        #find the apex of this histogram
-        maxvalidx = where(hist_values[0] == max(hist_values[0]))
-        # get the value of this apex (note - still in log10 units)
-        log_sigma = hist_values[1][maxvalidx]
-        # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
-        if len(log_sigma)>1:
-            log_sigma = average(log_sigma)
-        ## To do : check if aFT or mFT and adjust method
-        noise_mid = 10**log_sigma
-        noise_1std = noise_mid*self.settings.noise_threshold_log_nsigma_corr_factor #for mFT 0.463
-        return float(noise_mid), float(noise_1std)
+            # If there are 0 values, the log will fail
+            # But we may have negative values for aFT data, so we check if 0 exists
+            # Need to make a copy of the abundance cut values so we dont overwrite it....
+            tmp_abundance = abundance_cut.copy()
+            if 0 in tmp_abundance:
+                tmp_abundance[tmp_abundance==0] = nan
+                tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
+                # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined. 
+                # Hard to generalise - needs more investigation.
+
+            # calculate a histogram of the log10 of the abundance data
+            hist_values = histogram(log10(tmp_abundance),bins=self.settings.noise_threshold_log_nsigma_bins) 
+            #find the apex of this histogram
+            maxvalidx = where(hist_values[0] == max(hist_values[0]))
+            # get the value of this apex (note - still in log10 units)
+            log_sigma = hist_values[1][maxvalidx]
+            # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
+            if len(log_sigma)>1:
+                log_sigma = average(log_sigma)
+            ## To do : check if aFT or mFT and adjust method
+            noise_mid = 10**log_sigma
+            noise_1std = noise_mid*self.settings.noise_threshold_log_nsigma_corr_factor #for mFT 0.463
+            return float(noise_mid), float(noise_1std)
 
     def run_noise_threshold_calc(self):
         """ Runs noise threshold calculation (not log based method)
diff --git a/corems/mass_spectrum/calc/PeakPicking.py b/corems/mass_spectrum/calc/PeakPicking.py
@@ -612,7 +612,7 @@ def get_threshold(self, intes):
 
         elif noise_threshold_method == 'log':
             if self.is_centroid:
-                warnings.warn("log noise not tested for centroid data - proceed with caution")
+                raise  Exception("log noise Not tested for centroid data")
             abundance_threshold = self.settings.noise_threshold_log_nsigma
             factor = self.baseline_noise_std
 
diff --git a/corems/mass_spectrum/factory/MassSpectrumClasses.py b/corems/mass_spectrum/factory/MassSpectrumClasses.py
@@ -1,7 +1,6 @@
 from pathlib import Path
 from copy import deepcopy
 
-import warnings
 
 #from matplotlib import rcParamsDefault, rcParams
 from numpy import array, power, float64, where, histogram, trapz
@@ -64,10 +63,6 @@ class MassSpecBase(MassSpecCalc, KendrickGrouping):
         The root mean square of the mass spectrum's calibration.
     calibration_segment : None or CalibrationSegment
         The calibration segment of the mass spectrum.
-    calibration_ref_mzs: None or list
-        Reference masses used for calibration
-    calibration_meas_mzs: None or list
-        Measured masses used for calibration
     _abundance : ndarray
         The abundance values of the mass spectrum.
     _mz_exp : ndarray
@@ -126,8 +121,6 @@ def __init__(self, mz_exp, abundance, d_params, **kwargs):
         self.calibration_segment = None
         self.calibration_raw_error_median = None
         self.calibration_raw_error_stdev = None
-        self.calibration_ref_mzs = None
-        self.calibration_meas_mzs = None
 
     def _init_settings(self):
         """Initializes the settings for the mass spectrum."""
@@ -1504,13 +1497,6 @@ def process_mass_spec(self):
         #print("Loading mass spectrum object")
         
         abun = array(data_dict.get(Labels.abundance)).astype(float)
-
-        if self.label != Labels.thermo_centroid:
-            if self.settings.noise_threshold_method == 'log':
-                warnings.warn("log noise not tested for centroid data")
-                self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
-            else:
-                self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
         
         abundance_threshold, factor = self.get_threshold(abun)
         
@@ -1551,7 +1537,15 @@ def process_mass_spec(self):
         self._dynamic_range = self.max_abundance / self.min_abundance
         self._set_nominal_masses_start_final_indexes()
         
-
+        if self.label != Labels.thermo_centroid:
+            
+            if self.settings.noise_threshold_method == 'log':
+                
+                raise  Exception("log noise Not tested for centroid data")
+                #self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
+            
+            else:
+                self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
         
         del self.data_dict