|
1 | 1 | import time |
2 | 2 | from typing import Tuple |
3 | 3 |
|
4 | | -from numpy import where, average, std, isnan, inf, hstack, median, argmax, percentile, log10, histogram, nan, asarray |
| 4 | +from numpy import where, average, std, isnan, inf, hstack, median, argmax, percentile, log10, histogram, nan |
5 | 5 | #from scipy.signal import argrelmax |
6 | 6 | from corems import chunks |
7 | 7 | import warnings |
@@ -148,7 +148,6 @@ def get_noise_threshold(self) -> Tuple[Tuple[float, float], Tuple[float,float ]] |
148 | 148 |
|
149 | 149 | def cut_mz_domain_noise(self): |
150 | 150 | """Cut the m/z domain to the noise threshold regions. |
151 | | - Full profile data is used here. |
152 | 151 |
|
153 | 152 | Returns |
154 | 153 | ------- |
@@ -199,49 +198,6 @@ def cut_mz_domain_noise(self): |
199 | 198 | # pyplot.show() |
200 | 199 | return self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index] |
201 | 200 |
|
202 | | - def cut_mz_domain_noise_centroid(self): |
203 | | - """Cut the m/z domain to the noise threshold regions. |
204 | | - Centroided data is used here. |
205 | | -
|
206 | | - Returns |
207 | | - ------- |
208 | | - Tuple[np.array, np.array] |
209 | | - A tuple containing the m/z and abundance arrays of the truncated spectrum region. |
210 | | - """ |
211 | | - mz_exp_tmp = asarray(self._mz_exp) |
212 | | - min_mz_whole_ms = mz_exp_tmp.min() |
213 | | - max_mz_whole_ms = mz_exp_tmp.max() |
214 | | - |
215 | | - if self.settings.noise_threshold_method == 'minima': |
216 | | - |
217 | | - # this calculation is taking too long (about 2 seconds) |
218 | | - number_average_molecular_weight = self.weight_average_molecular_weight( |
219 | | - profile=True) |
220 | | - |
221 | | - # +-200 is a guess for testing only, it needs adjustment for each type of analysis |
222 | | - # need to check min mz here or it will break |
223 | | - min_mz_noise = number_average_molecular_weight - 100 |
224 | | - # need to check max mz here or it will break |
225 | | - max_mz_noise = number_average_molecular_weight + 100 |
226 | | - |
227 | | - else: |
228 | | - min_mz_noise = self.settings.noise_min_mz |
229 | | - max_mz_noise = self.settings.noise_max_mz |
230 | | - |
231 | | - if min_mz_noise < min_mz_whole_ms: |
232 | | - min_mz_noise = min_mz_whole_ms |
233 | | - |
234 | | - if max_mz_noise > max_mz_whole_ms: |
235 | | - max_mz_noise = max_mz_whole_ms |
236 | | - |
237 | | - low_mz_index = (where(mz_exp_tmp >= min_mz_noise)[0][0]) |
238 | | - high_mz_index = (where(mz_exp_tmp <= max_mz_noise)[-1][-1]) |
239 | | - |
240 | | - if high_mz_index > low_mz_index: |
241 | | - return self._mz_exp[high_mz_index:low_mz_index], self._abundance[low_mz_index:high_mz_index] |
242 | | - else: |
243 | | - return self._mz_exp[high_mz_index:low_mz_index], self._abundance[high_mz_index:low_mz_index] |
244 | | - |
245 | 201 |
|
246 | 202 | def get_noise_average(self, ymincentroid): |
247 | 203 | """ Get the average noise and standard deviation. |
@@ -333,34 +289,33 @@ def run_log_noise_threshold_calc(self): |
333 | 289 | """ |
334 | 290 |
|
335 | 291 | if self.is_centroid: |
336 | | - warnings.warn("log noise not tested for centroid data - proceed with caution") |
337 | | - mz_cut, abundance_cut = self.cut_mz_domain_noise_centroid() |
| 292 | + raise Exception("log noise Not tested for centroid data") |
338 | 293 | else: |
339 | 294 | # cut the spectrum to ROI |
340 | 295 | mz_cut, abundance_cut = self.cut_mz_domain_noise() |
341 | | - # If there are 0 values, the log will fail |
342 | | - # But we may have negative values for aFT data, so we check if 0 exists |
343 | | - # Need to make a copy of the abundance cut values so we dont overwrite it.... |
344 | | - tmp_abundance = abundance_cut.copy() |
345 | | - if 0 in tmp_abundance: |
346 | | - tmp_abundance[tmp_abundance==0] = nan |
347 | | - tmp_abundance = tmp_abundance[~isnan(tmp_abundance)] |
348 | | - # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined. |
349 | | - # Hard to generalise - needs more investigation. |
350 | | - |
351 | | - # calculate a histogram of the log10 of the abundance data |
352 | | - hist_values = histogram(log10(tmp_abundance),bins=self.settings.noise_threshold_log_nsigma_bins) |
353 | | - #find the apex of this histogram |
354 | | - maxvalidx = where(hist_values[0] == max(hist_values[0])) |
355 | | - # get the value of this apex (note - still in log10 units) |
356 | | - log_sigma = hist_values[1][maxvalidx] |
357 | | - # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry |
358 | | - if len(log_sigma)>1: |
359 | | - log_sigma = average(log_sigma) |
360 | | - ## To do : check if aFT or mFT and adjust method |
361 | | - noise_mid = 10**log_sigma |
362 | | - noise_1std = noise_mid*self.settings.noise_threshold_log_nsigma_corr_factor #for mFT 0.463 |
363 | | - return float(noise_mid), float(noise_1std) |
| 296 | + # If there are 0 values, the log will fail |
| 297 | + # But we may have negative values for aFT data, so we check if 0 exists |
| 298 | + # Need to make a copy of the abundance cut values so we dont overwrite it.... |
| 299 | + tmp_abundance = abundance_cut.copy() |
| 300 | + if 0 in tmp_abundance: |
| 301 | + tmp_abundance[tmp_abundance==0] = nan |
| 302 | + tmp_abundance = tmp_abundance[~isnan(tmp_abundance)] |
| 303 | + # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined. |
| 304 | + # Hard to generalise - needs more investigation. |
| 305 | + |
| 306 | + # calculate a histogram of the log10 of the abundance data |
| 307 | + hist_values = histogram(log10(tmp_abundance),bins=self.settings.noise_threshold_log_nsigma_bins) |
| 308 | + #find the apex of this histogram |
| 309 | + maxvalidx = where(hist_values[0] == max(hist_values[0])) |
| 310 | + # get the value of this apex (note - still in log10 units) |
| 311 | + log_sigma = hist_values[1][maxvalidx] |
| 312 | + # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry |
| 313 | + if len(log_sigma)>1: |
| 314 | + log_sigma = average(log_sigma) |
| 315 | + ## To do : check if aFT or mFT and adjust method |
| 316 | + noise_mid = 10**log_sigma |
| 317 | + noise_1std = noise_mid*self.settings.noise_threshold_log_nsigma_corr_factor #for mFT 0.463 |
| 318 | + return float(noise_mid), float(noise_1std) |
364 | 319 |
|
365 | 320 | def run_noise_threshold_calc(self): |
366 | 321 | """ Runs noise threshold calculation (not log based method) |
|
0 commit comments