@@ -646,12 +646,14 @@ def __init__(self):
646646 super ().__init__ ()
647647
648648 # API endpoint for precursor m/z search
649+ # inputs = mz, tolerance (in Da), polarity, page_no, per_page
649650 self .PRECURSOR_MZ_URL = (
650- "https://metabref.emsl.pnnl.gov/api/precursors/m/{}/t/{}/{}"
651+ "https://metabref.emsl.pnnl.gov/api/precursors/m/{}/t/{}/{}?page={}&per_page={} "
651652 )
652653
653654 # API endpoint for returning full list of precursor m/z values in database
654- self .PRECURSOR_MZ_ALL_URL = "https://metabref.emsl.pnnl.gov/api/precursors/{}"
655+ # inputs = polarity, page_no, per_page
656+ self .PRECURSOR_MZ_ALL_URL = "https://metabref.emsl.pnnl.gov/api/precursors/{}?page={}&per_page={}"
655657
656658 self .__init_format_map__ ()
657659
@@ -674,7 +676,7 @@ def __init_format_map__(self):
674676 self .format_map ["fe" ] = self .format_map ["flashentropy" ]
675677 self .format_map ["flash-entropy" ] = self .format_map ["flashentropy" ]
676678
677- def query_by_precursor (self , mz_list , polarity , mz_tol_ppm , mz_tol_da_api = 0.2 ):
679+ def query_by_precursor (self , mz_list , polarity , mz_tol_ppm , mz_tol_da_api = 0.2 , max_per_page = 50 ):
678680 """
679681 Query MetabRef by precursor m/z values.
680682
@@ -690,6 +692,8 @@ def query_by_precursor(self, mz_list, polarity, mz_tol_ppm, mz_tol_da_api=0.2):
690692 mz_tol_da_api : float, optional
691693 Maximum tolerance between precursor m/z values for API search, in daltons.
692694 Used to group similar mzs into a single API query for speed. Default is 0.2.
695+ max_per_page : int, optional
696+ Maximum records to return from MetabRef API query at a time. Default is 50.
693697
694698 Returns
695699 -------
@@ -722,32 +726,59 @@ def query_by_precursor(self, mz_list, polarity, mz_tol_ppm, mz_tol_da_api=0.2):
722726 tol = (max (mz_group ) - min (mz_group )) / 2 + mz_tol_ppm ** - 6 * max (
723727 mz_group
724728 )
725- lib = lib + self .get_query (
726- self .PRECURSOR_MZ_URL .format (str (mz ), str (tol ), polarity )
729+
730+ # Get first page of results
731+ response = self .get_query (
732+ self .PRECURSOR_MZ_URL .format (str (mz ), str (tol ), polarity , 1 , max_per_page )
727733 )
734+ lib = lib + response ['results' ]
735+
736+ # If there are more pages of results, get them
737+ if response ['total_pages' ] > 1 :
738+ for i in np .arange (2 , response ['total_pages' ]+ 1 ):
739+ lib = lib + self .get_query (
740+ self .PRECURSOR_MZ_URL .format (str (mz ), str (tol ), polarity , i , max_per_page )
741+ )['results' ]
728742
729743 return lib
730744
731- def request_all_precursors (self , polarity ):
745+ def request_all_precursors (self , polarity , per_page = 50000 ):
732746 """
733- Request all precursor m/z values from MetabRef.
747+ Request all precursor m/z values for MS2 spectra from MetabRef.
734748
735749 Parameters
736750 ----------
737751 polarity : str
738752 Ionization polarity, either "positive" or "negative".
753+ per_page : int, optional
754+ Number of records to fetch per call. Default is 50000
739755
740756 Returns
741757 -------
742758 list
743- List of all precursor m/z values.
759+ List of all precursor m/z values, sorted .
744760 """
745761 # If polarity is anything other than positive or negative, raise error
746762 if polarity not in ["positive" , "negative" ]:
747763 raise ValueError ("Polarity must be 'positive' or 'negative'" )
748764
749- # Query MetabRef for all precursor m/z values
750- return self .get_query (self .PRECURSOR_MZ_ALL_URL .format (polarity ))
765+ precursors = []
766+
767+ # Get first page of results and total number of pages of results
768+ response = self .get_query (self .PRECURSOR_MZ_ALL_URL .format (polarity , str (1 ), str (per_page )))
769+ total_pages = response ['total_pages' ]
770+ precursors .extend ([x ['precursor_ion' ] for x in response ['results' ]])
771+
772+ # Go through remaining pages of results
773+ for i in np .arange (2 , total_pages + 1 ):
774+ response = self .get_query (self .PRECURSOR_MZ_ALL_URL .format (polarity , str (i ), str (per_page )))
775+ precursors .extend ([x ['precursor_ion' ] for x in response ['results' ]])
776+
777+ # Sort precursors from smallest to largest and remove duplicates
778+ precursors = list (set (precursors ))
779+ precursors .sort ()
780+
781+ return precursors
751782
752783 def get_lipid_library (
753784 self ,
@@ -789,14 +820,25 @@ def get_lipid_library(
789820
790821 """
791822 mz_list .sort ()
823+ mz_list = np .array (mz_list )
792824
793825 # Get all precursors in the library matching the polarity
794826 precusors_in_lib = self .request_all_precursors (polarity = polarity )
795- precusors_in_lib .sort ()
796827 precusors_in_lib = np .array (precusors_in_lib )
797828
798829 # Compare the mz_list with the precursors in the library, keep any mzs that are within mz_tol of any precursor in the library
799- mz_list = np .array (mz_list )
830+ lib_mz_df = pd .DataFrame (precusors_in_lib , columns = ["lib_mz" ])
831+ lib_mz_df ["closest_obs_mz" ] = mz_list [
832+ find_closest (mz_list , lib_mz_df .lib_mz .values )
833+ ]
834+ lib_mz_df ["mz_diff_ppm" ] = np .abs (
835+ (lib_mz_df ["lib_mz" ] - lib_mz_df ["closest_obs_mz" ])
836+ / lib_mz_df ["lib_mz" ]
837+ * 1e6
838+ )
839+ lib_mz_sub = lib_mz_df [lib_mz_df ["mz_diff_ppm" ] <= mz_tol_ppm ]
840+
841+ # Do the same in the opposite direction
800842 mz_df = pd .DataFrame (mz_list , columns = ["mass_feature_mz" ])
801843 mz_df ["closest_lib_pre_mz" ] = precusors_in_lib [
802844 find_closest (precusors_in_lib , mz_df .mass_feature_mz .values )
@@ -808,9 +850,15 @@ def get_lipid_library(
808850 )
809851 mz_df_sub = mz_df [mz_df ["mz_diff_ppm" ] <= mz_tol_ppm ]
810852
853+ # Evaluate which is fewer mzs - lib_mz_sub or mz_df_sub and use that as the input for next step
854+ if len (lib_mz_sub ) < len (mz_df_sub ):
855+ mzs_to_query = lib_mz_sub .lib_mz .values
856+ else :
857+ mzs_to_query = mz_df_sub .mass_feature_mz .values
858+
811859 # Query the library for the precursors in the mz_list that are in the library to retrieve the spectra and metadata
812860 lib = self .query_by_precursor (
813- mz_list = mz_df_sub . mass_feature_mz . values ,
861+ mz_list = mzs_to_query ,
814862 polarity = polarity ,
815863 mz_tol_ppm = mz_tol_ppm ,
816864 mz_tol_da_api = mz_tol_da_api ,
0 commit comments