|
1 | 1 | __author__ = "Yuri E. Corilo" |
2 | 2 | __date__ = "Jun 12, 2019" |
3 | 3 |
|
| 4 | +import numpy as np |
| 5 | + |
| 6 | +from corems.encapsulation.constant import Atoms |
4 | 7 | from corems.mass_spectrum.input.baseClass import MassListBaseClass |
5 | 8 | from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecProfile, MassSpecCentroid |
6 | | -from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula |
| 9 | +from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula, MolecularFormulaIsotopologue |
7 | 10 | from corems.encapsulation.constant import Labels, Atoms |
8 | 11 | from corems.encapsulation.factory.processingSetting import DataInputSetting |
9 | 12 |
|
@@ -101,17 +104,81 @@ def add_molecular_formula(self, mass_spec_obj, dataframe): |
101 | 104 | atoms = list(formula_df.columns.astype(str)) |
102 | 105 | counts = list(formula_df.iloc[df_index].astype(int)) |
103 | 106 |
|
104 | | - formula_list = [sub[item] for item in range(len(atoms)) |
105 | | - for sub in [atoms, counts]] |
| 107 | + formula_dict = dict(zip(atoms, counts)) |
106 | 108 | if sum(counts) > 0: |
107 | 109 |
|
108 | 110 | ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index])) |
109 | 111 | if adduct_df is not None: |
110 | 112 | adduct_atom = str(adduct_df[df_index]) |
| 113 | + if adduct_atom == 'None': |
| 114 | + adduct_atom = None |
111 | 115 | else: |
112 | 116 | adduct_atom = None |
113 | | - mfobj = MolecularFormula(formula_list, int(ion_charge_df[df_index]), mspeak_parent=mass_spec_obj[ms_peak_index] , ion_type=ion_type, adduct_atom=adduct_atom) |
114 | | - mfobj.is_isotopologue = bool(is_isotopologue_df[df_index]) |
| 117 | + |
| 118 | + # If not isotopologue, cast as MolecularFormula |
| 119 | + if not bool(int(is_isotopologue_df[df_index])): |
| 120 | + mfobj = MolecularFormula( |
| 121 | + formula_dict, int(ion_charge_df[df_index]), |
| 122 | + mspeak_parent=mass_spec_obj[ms_peak_index] , |
| 123 | + ion_type=ion_type, adduct_atom=adduct_atom |
| 124 | + ) |
| 125 | + |
| 126 | + # if is isotopologue, recast as MolecularFormulaIsotopologue |
| 127 | + if bool(int(is_isotopologue_df[df_index])): |
| 128 | + |
| 129 | + # First make a MolecularFormula object for the parent so we can get probabilities etc |
| 130 | + formula_list_parent = {} |
| 131 | + for atom in formula_dict: |
| 132 | + if atom in Atoms.isotopes.keys(): |
| 133 | + formula_list_parent[atom] = formula_dict[atom] |
| 134 | + else: |
| 135 | + # remove any numbers from the atom name to cast as a mono-isotopic atom |
| 136 | + atom_mono = atom.strip('0123456789') |
| 137 | + if atom_mono in Atoms.isotopes.keys(): |
| 138 | + formula_list_parent[atom_mono] = formula_list_parent[atom_mono]+formula_dict[atom] |
| 139 | + else: |
| 140 | + print(f"Atom {atom} not in Atoms.atoms_order") |
| 141 | + mono_index = int(dataframe.iloc[df_index]['Mono Isotopic Index']) |
| 142 | + mono_mfobj = MolecularFormula( |
| 143 | + formula_list_parent, |
| 144 | + int(ion_charge_df[df_index]), |
| 145 | + mspeak_parent=mass_spec_obj[mono_index], |
| 146 | + ion_type=ion_type, |
| 147 | + adduct_atom=adduct_atom |
| 148 | + ) |
| 149 | + |
| 150 | + # Next, generate isotopologues from the parent |
| 151 | + isos = list( |
| 152 | + mono_mfobj.isotopologues( |
| 153 | + min_abundance = mass_spec_obj[df_index].abundance*0.1, |
| 154 | + current_mono_abundance = mass_spec_obj[mono_index].abundance, |
| 155 | + dynamic_range = mass_spec_obj.dynamic_range |
| 156 | + ) |
| 157 | + ) |
| 158 | + |
| 159 | + # Finally, find the isotopologue that matches the formula_dict |
| 160 | + matched_isos = isos |
| 161 | + for iso in isos: |
| 162 | + if set(iso.atoms) == set(formula_dict.keys()): |
| 163 | + # Check the values of the atoms match |
| 164 | + if all([iso[atom] == formula_dict[atom] for atom in formula_dict]): |
| 165 | + matched_isos = [iso] |
| 166 | + if len(matched_isos) > 1: |
| 167 | + raise ValueError("More than one isotopologue matched the formula_dict: {matched_isos}") |
| 168 | + if len(matched_isos) == 0: |
| 169 | + raise ValueError("No isotopologue matched the formula_dict") |
| 170 | + mfobj = matched_isos[0] |
| 171 | + |
| 172 | + # Add the mono isotopic index, confidence score and isotopologue similarity |
| 173 | + mfobj.mspeak_index_mono_isotopic = int(dataframe.iloc[df_index]['Mono Isotopic Index']) |
| 174 | + |
| 175 | + # Add the confidence score and isotopologue similarity and average MZ error score |
| 176 | + if 'm/z Error Score' in dataframe: |
| 177 | + mfobj._mass_error_average_score = float(dataframe.iloc[df_index]['m/z Error Score']) |
| 178 | + if 'Confidence Score' in dataframe: |
| 179 | + mfobj._confidence_score = float(dataframe.iloc[df_index]['Confidence Score']) |
| 180 | + if 'Isotopologue Similarity' in dataframe: |
| 181 | + mfobj._isotopologue_similarity = float(dataframe.iloc[df_index]['Isotopologue Similarity']) |
115 | 182 | mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj) |
116 | 183 |
|
117 | 184 |
|
|
0 commit comments