@@ -93,25 +93,60 @@ def set_params(params: dict):
9393 return {par : val for par , val in params .items () if val is not None }
9494
9595 def get_results (params : dict , headers : dict = None ):
96- response = requests .get (url = "https://cmr.earthdata.nasa.gov/search/granules.csv " ,
96+ response = requests .get (url = "https://cmr.earthdata.nasa.gov/search/granules.json " ,
9797 params = set_params (params ),
98- headers = headers )
99- return response , response .headers
100-
101-
102- def get_granules (params : dict ):
103- response , headers = get_results (params = params )
104- # scroll = headers['CMR-Scroll-Id']
105- hits = int (headers ['CMR-Hits' ])
106- if hits == 0 :
107- raise Exception ("No granules matched your input parameters." )
108- df = pd .read_csv (StringIO (response .text ))
109- while hits > df .index .size :
110- # response, _ = get_results(params=params, headers={'CMR-Scroll-Id': scroll})
111- response , _ = get_results (params = params )
112- data = pd .read_csv (StringIO (response .text ))
113- df = pd .concat ([df , data ])
114- return df
98+ headers = headers ).json ()
99+ return response
100+
101+
102+ # def get_granules(params: dict):
103+ # time_start = np.array([]).astype('datetime64[ns]')
104+ # response, headers = get_results(params=params)
105+ # # scroll = headers['CMR-Scroll-Id']
106+ # hits = int(headers['CMR-Hits'])
107+ # if hits==0:
108+ # raise Exception("No granules matched your input parameters.")
109+ # df = pd.read_csv(StringIO(response.text))
110+ # while hits > df.index.size:
111+ # # response, _ = get_results(params=params, headers={'CMR-Scroll-Id': scroll})
112+ # response, _ = get_results(params=params)
113+ # data = pd.read_csv(StringIO(response.text))
114+ # df = pd.concat([df, data])
115+ # return df
116+
117+ def get_granules (params : dict , ShortName : str , SingleDay_flag : bool ):
118+ time_start = np .array ([]).astype ('datetime64[ns]' )
119+ urls = []
120+ sizes = []
121+ completed_query = False
122+ while completed_query == False :
123+ response = get_results (params = params )
124+ if 'feed' in response .keys ():
125+ for curr_entry in response ['feed' ]['entry' ]:
126+ time_start = np .append (time_start ,np .datetime64 (curr_entry ['time_start' ],'ns' ))
127+ sizes .append (curr_entry ['granule_size' ])
128+ for curr_link in curr_entry ['links' ]:
129+ if ".nc" in curr_link ['title' ][- 3 :]:
130+ urls .append (curr_link ['href' ])
131+ break
132+ elif 'errors' in response .keys ():
133+ raise Exception (response ['errors' ][0 ])
134+
135+ if len (response ['feed' ]['entry' ]) < 2000 :
136+ completed_query = True
137+ else :
138+ # do another CMR search since previous search hit the allowed maximum
139+ # number of entries (2000)
140+ params ['temporal' ] = str (np .datetime64 (response ['feed' ]['entry' ][- 1 ]['time_end' ],'D' )\
141+ + np .timedelta64 (1 ,'D' ))+ params ['temporal' ][10 :]
142+
143+ # reduce granule list to single day if only one day in requested range
144+ if (('MONTHLY' in ShortName ) or ('DAILY' in ShortName )):
145+ if ((SingleDay_flag == True ) and (len (urls ) > 1 )):
146+ day_index = np .argmin (np .abs (time_start - np .datetime64 (StartDate ,'D' )))
147+ urls = urls [day_index :(day_index + 1 )]
148+
149+ return urls ,sizes
115150
116151
117152
@@ -148,48 +183,45 @@ def get_granules(params: dict):
148183 ### Query CMR for the desired ECCO Dataset
149184
150185 # grans means 'granules', PO.DAAC's term for individual files in a dataset
151- grans = get_granules (input_search_params )
152-
153-
154- ## Prepare results of query
155-
156- # reduce granule list to single day if only one day in requested range
157- if (('MONTHLY' in ShortName ) or ('DAILY' in ShortName )):
158- if ((SingleDay_flag == True ) and (len (grans ['Granule UR' ]) > 1 )):
159- day_index = np .argmin (np .abs (np .asarray (grans ['Start Time' ])\
160- .astype ('datetime64[ns]' ) - np .datetime64 (StartDate ,'D' )))
161- grans = grans [day_index :(day_index + 1 )]
162-
163- # convert the rows of the 'Online Access URLS' column to a Python list
164- urls = grans ['Online Access URLs' ].tolist ()
186+ urls ,gran_sizes = get_granules (input_search_params ,ShortName ,SingleDay_flag )
187+
188+ # ## Prepare results of query
189+ #
190+ # # reduce granule list to single day if only one day in requested range
191+ # if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
192+ # if ((SingleDay_flag == True) and (len(grans['Granule UR']) > 1)):
193+ # day_index = np.argmin(np.abs(np.asarray(grans['Start Time'])\
194+ # .astype('datetime64[ns]') - np.datetime64(StartDate,'D')))
195+ # grans = grans[day_index:(day_index+1)]
196+ #
197+ # # convert the rows of the 'Online Access URLS' column to a Python list
198+ # urls = grans['Online Access URLs'].tolist()
165199
166200 # estimate granule sizes where this info is missing from CMR
167- sizes = (2 ** 20 )* np .asarray (grans [ 'Size' ] ).astype ('float64' )
201+ sizes = (2 ** 20 )* np .asarray (gran_sizes ).astype ('float64' )
168202 sizes = np .where (sizes > (2 ** 10 ),sizes ,np .nan )
169203 if np .sum (~ np .isnan (sizes )) >= 1 :
170204 sizes = np .where (~ np .isnan (sizes ),sizes ,np .nanmean (sizes ))
171205 else :
172206 input_search_params ['temporal' ] = ['1992-01-01' ,'2017-12-31' ]
173- grans_all = get_granules (input_search_params )
207+ _ , gran_sizes_all = get_granules (input_search_params )
174208 sizes_all = (2 ** 20 )* np .asarray (grans_all ['Size' ]).astype ('float64' )
175209 sizes_all = np .where (sizes_all > (2 ** 10 ),sizes_all ,np .nan )
176210 sizes = np .where (~ np .isnan (sizes ),sizes ,np .nanmean (sizes_all ))
177211 sizes = list (sizes )
178- urls = grans ['Online Access URLs' ].tolist ()
212+ # urls = grans['Online Access URLs'].tolist()
179213
180214 # for snapshot datasets with monthly snapshot_interval, only include snapshots at beginning/end of months
181215 if 'SNAPSHOT' in ShortName :
182216 if snapshot_interval == 'monthly' :
183217 import re
184- urls_list_copy = list (tuple (urls ))
185- sizes_list_copy = list (tuple (sizes ))
186- for idx ,(url ,size ) in enumerate (zip (urls ,sizes )):
218+ url_sizes_dict = {url :size for url ,size in zip (urls ,sizes )}
219+ for url ,size in zip (urls ,sizes ):
187220 snapshot_date = re .findall ("_[0-9]{4}-[0-9]{2}-[0-9]{2}" ,url )[0 ][1 :]
188221 if snapshot_date [8 :] != '01' :
189- urls_list_copy .remove (url )
190- del sizes_list_copy [idx ]
191- urls = urls_list_copy
192- sizes = sizes_list_copy
222+ del url_sizes_dict [url ]
223+ urls = list (url_sizes_dict .keys ())
224+ sizes = list (url_sizes_dict .values ())
193225
194226 return urls ,sizes
195227
0 commit comments