Skip to content

Commit dffc59e

Browse files
committed
Merge branch 'corems_massspectra_importer_fix' into 'master'
Improve ReadCoreMSHDF_MassSpectrum import and export Closes #152 and #153 See merge request mass-spectrometry/corems!122
2 parents ad375d0 + d377652 commit dffc59e

22 files changed

Lines changed: 12835 additions & 12679 deletions

NEG_ESI_SRFA_CoreMS_prob_score.csv

Lines changed: 0 additions & 21 deletions
This file was deleted.

NEG_ESI_SRFA_CoreMS_prob_score.json

Lines changed: 0 additions & 201 deletions
This file was deleted.

corems/mass_spectrum/input/coremsHDF5.py

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,6 @@ def __init__(self, file_location):
5252

5353
self.scans = list(self.h5pydata.keys())
5454

55-
print(self.scans)
56-
5755
def load_raw_data(self, mass_spectrum, scan_index=0):
5856
"""
5957
Load raw data into the mass spectrum object.
@@ -68,13 +66,16 @@ def load_raw_data(self, mass_spectrum, scan_index=0):
6866

6967
scan_label = self.scans[scan_index]
7068

71-
mz_profile = self.h5pydata[scan_label]["raw_ms"][0]
69+
# Check if the "raw_ms" group in the scan is empty
70+
if self.h5pydata[scan_label]["raw_ms"].shape is not None:
71+
72+
mz_profile = self.h5pydata[scan_label]["raw_ms"][0]
7273

73-
abundance_profile = self.h5pydata[scan_label]["raw_ms"][1]
74+
abundance_profile = self.h5pydata[scan_label]["raw_ms"][1]
7475

75-
mass_spectrum.mz_exp_profile = mz_profile
76+
mass_spectrum.mz_exp_profile = mz_profile
7677

77-
mass_spectrum.abundance_profile = abundance_profile
78+
mass_spectrum.abundance_profile = abundance_profile
7879

7980
def get_mass_spectrum(
8081
self,
@@ -83,9 +84,12 @@ def get_mass_spectrum(
8384
auto_process=True,
8485
load_settings=True,
8586
load_raw=True,
87+
load_molecular_formula=True,
8688
):
8789
"""
88-
Get a mass spectrum object.
90+
Instantiate a mass spectrum object from the CoreMS HDF5 file.
91+
Note that this always returns a centroid mass spectrum object; functionality for profile and
92+
frequency mass spectra is not yet implemented.
8993
9094
Parameters
9195
----------
@@ -99,14 +103,23 @@ def get_mass_spectrum(
99103
Whether to load the settings into the mass spectrum object. Default is True.
100104
load_raw : bool, optional
101105
Whether to load the raw data into the mass spectrum object. Default is True.
106+
load_molecular_formula : bool, optional
107+
Whether to load the molecular formula into the mass spectrum object.
108+
Default is True.
102109
103110
Returns
104111
-------
105112
MassSpecCentroid
106113
The mass spectrum object.
114+
115+
Raises
116+
------
117+
ValueError
118+
If the CoreMS file is not valid.
119+
If the mass spectrum has not been processed and load_molecular_formula is True.
107120
"""
108-
109-
dataframe = self.get_dataframe(scan_number, time_index=time_index)
121+
scan_index = self.scans.index(str(scan_number))
122+
dataframe = self.get_dataframe(scan_index, time_index=time_index)
110123

111124
if not set(
112125
["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
@@ -117,23 +130,45 @@ def get_mass_spectrum(
117130

118131
dataframe.rename(columns=self.parameters.header_translate, inplace=True)
119132

133+
# Cast m/z, and 'Peak Height' to float
134+
dataframe["m/z"] = dataframe["m/z"].astype(float)
135+
dataframe["Peak Height"] = dataframe["Peak Height"].astype(float)
136+
120137
polarity = dataframe["Ion Charge"].values[0]
121138

122-
output_parameters = self.get_output_parameters(polarity, scan_index=scan_number)
139+
output_parameters = self.get_output_parameters(polarity, scan_index=scan_index)
123140

124141
mass_spec_obj = MassSpecCentroid(
125-
dataframe.to_dict(orient="list"), output_parameters
142+
dataframe.to_dict(orient="list"), output_parameters, auto_process = False
126143
)
127144

145+
if auto_process:
146+
# Set the settings on the mass spectrum object to relative abuncance of 0 so all peaks get added
147+
mass_spec_obj.settings.noise_threshold_method = "absolute_abundance"
148+
mass_spec_obj.settings.noise_threshold_absolute_abundance = 0
149+
mass_spec_obj.process_mass_spec()
150+
128151
if load_settings:
152+
# Load settings into the mass spectrum object
129153
self.load_settings(
130-
mass_spec_obj, scan_index=scan_number, time_index=time_index
154+
mass_spec_obj,
155+
scan_index=scan_index,
156+
time_index=time_index
131157
)
132158

133159
if load_raw:
134-
self.load_raw_data(mass_spec_obj, scan_index=scan_number)
135-
136-
self.add_molecular_formula(mass_spec_obj, dataframe)
160+
self.load_raw_data(
161+
mass_spec_obj,
162+
scan_index=scan_index
163+
)
164+
165+
if load_molecular_formula:
166+
if not auto_process:
167+
raise ValueError(
168+
"Can only add molecular formula if the mass spectrum has been processed"
169+
)
170+
else:
171+
self.add_molecular_formula(mass_spec_obj, dataframe)
137172

138173
return mass_spec_obj
139174

corems/mass_spectrum/output/export.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,8 @@ def to_hdf(self):
306306

307307
scan_group = hdf_handle.create_group(str(self.mass_spectrum.scan_number))
308308

309-
if list(self.mass_spectrum.abundance_profile):
309+
# If there is raw data (from profile data) save it
310+
if not self.mass_spectrum.is_centroid:
310311

311312
mz_abun_array = empty(shape=(2, len(self.mass_spectrum.abundance_profile)))
312313

0 commit comments

Comments
 (0)