Skip to content

Commit 4083692

Browse files
committed
corrected handling of snapshots at monthly intervals and changed CMR queries to json, for download modes
1 parent 611c162 commit 4083692

1 file changed

Lines changed: 74 additions & 42 deletions

File tree

ecco_access/ecco_download.py

Lines changed: 74 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -93,25 +93,60 @@ def set_params(params: dict):
9393
return {par: val for par, val in params.items() if val is not None}
9494

9595
def get_results(params: dict, headers: dict=None):
96-
response = requests.get(url="https://cmr.earthdata.nasa.gov/search/granules.csv",
96+
response = requests.get(url="https://cmr.earthdata.nasa.gov/search/granules.json",
9797
params=set_params(params),
98-
headers=headers)
99-
return response, response.headers
100-
101-
102-
def get_granules(params: dict):
103-
response, headers = get_results(params=params)
104-
# scroll = headers['CMR-Scroll-Id']
105-
hits = int(headers['CMR-Hits'])
106-
if hits==0:
107-
raise Exception("No granules matched your input parameters.")
108-
df = pd.read_csv(StringIO(response.text))
109-
while hits > df.index.size:
110-
# response, _ = get_results(params=params, headers={'CMR-Scroll-Id': scroll})
111-
response, _ = get_results(params=params)
112-
data = pd.read_csv(StringIO(response.text))
113-
df = pd.concat([df, data])
114-
return df
98+
headers=headers).json()
99+
return response
100+
101+
102+
# def get_granules(params: dict):
103+
# time_start = np.array([]).astype('datetime64[ns]')
104+
# response, headers = get_results(params=params)
105+
# # scroll = headers['CMR-Scroll-Id']
106+
# hits = int(headers['CMR-Hits'])
107+
# if hits==0:
108+
# raise Exception("No granules matched your input parameters.")
109+
# df = pd.read_csv(StringIO(response.text))
110+
# while hits > df.index.size:
111+
# # response, _ = get_results(params=params, headers={'CMR-Scroll-Id': scroll})
112+
# response, _ = get_results(params=params)
113+
# data = pd.read_csv(StringIO(response.text))
114+
# df = pd.concat([df, data])
115+
# return df
116+
117+
def get_granules(params: dict, ShortName: str, SingleDay_flag: bool):
118+
time_start = np.array([]).astype('datetime64[ns]')
119+
urls = []
120+
sizes = []
121+
completed_query = False
122+
while completed_query == False:
123+
response = get_results(params=params)
124+
if 'feed' in response.keys():
125+
for curr_entry in response['feed']['entry']:
126+
time_start = np.append(time_start,np.datetime64(curr_entry['time_start'],'ns'))
127+
sizes.append(curr_entry['granule_size'])
128+
for curr_link in curr_entry['links']:
129+
if ".nc" in curr_link['title'][-3:]:
130+
urls.append(curr_link['href'])
131+
break
132+
elif 'errors' in response.keys():
133+
raise Exception(response['errors'][0])
134+
135+
if len(response['feed']['entry']) < 2000:
136+
completed_query = True
137+
else:
138+
# do another CMR search since previous search hit the allowed maximum
139+
# number of entries (2000)
140+
params['temporal'] = str(np.datetime64(response['feed']['entry'][-1]['time_end'],'D')\
141+
+ np.timedelta64(1,'D'))+params['temporal'][10:]
142+
143+
# reduce granule list to single day if only one day in requested range
144+
if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
145+
if ((SingleDay_flag == True) and (len(urls) > 1)):
146+
day_index = np.argmin(np.abs(time_start - np.datetime64(StartDate,'D')))
147+
urls = urls[day_index:(day_index+1)]
148+
149+
return urls,sizes
115150

116151

117152

@@ -148,48 +183,45 @@ def get_granules(params: dict):
148183
### Query CMR for the desired ECCO Dataset
149184

150185
# grans means 'granules', PO.DAAC's term for individual files in a dataset
151-
grans = get_granules(input_search_params)
152-
153-
154-
## Prepare results of query
155-
156-
# reduce granule list to single day if only one day in requested range
157-
if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
158-
if ((SingleDay_flag == True) and (len(grans['Granule UR']) > 1)):
159-
day_index = np.argmin(np.abs(np.asarray(grans['Start Time'])\
160-
.astype('datetime64[ns]') - np.datetime64(StartDate,'D')))
161-
grans = grans[day_index:(day_index+1)]
162-
163-
# convert the rows of the 'Online Access URLS' column to a Python list
164-
urls = grans['Online Access URLs'].tolist()
186+
urls,gran_sizes = get_granules(input_search_params,ShortName,SingleDay_flag)
187+
188+
# ## Prepare results of query
189+
#
190+
# # reduce granule list to single day if only one day in requested range
191+
# if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
192+
# if ((SingleDay_flag == True) and (len(grans['Granule UR']) > 1)):
193+
# day_index = np.argmin(np.abs(np.asarray(grans['Start Time'])\
194+
# .astype('datetime64[ns]') - np.datetime64(StartDate,'D')))
195+
# grans = grans[day_index:(day_index+1)]
196+
#
197+
# # convert the rows of the 'Online Access URLS' column to a Python list
198+
# urls = grans['Online Access URLs'].tolist()
165199

166200
# estimate granule sizes where this info is missing from CMR
167-
sizes = (2**20)*np.asarray(grans['Size']).astype('float64')
201+
sizes = (2**20)*np.asarray(gran_sizes).astype('float64')
168202
sizes = np.where(sizes > (2**10),sizes,np.nan)
169203
if np.sum(~np.isnan(sizes)) >= 1:
170204
sizes = np.where(~np.isnan(sizes),sizes,np.nanmean(sizes))
171205
else:
172206
input_search_params['temporal'] = ['1992-01-01','2017-12-31']
173-
grans_all = get_granules(input_search_params)
207+
_,gran_sizes_all = get_granules(input_search_params)
174208
sizes_all = (2**20)*np.asarray(grans_all['Size']).astype('float64')
175209
sizes_all = np.where(sizes_all > (2**10),sizes_all,np.nan)
176210
sizes = np.where(~np.isnan(sizes),sizes,np.nanmean(sizes_all))
177211
sizes = list(sizes)
178-
urls = grans['Online Access URLs'].tolist()
212+
# urls = grans['Online Access URLs'].tolist()
179213

180214
# for snapshot datasets with monthly snapshot_interval, only include snapshots at beginning/end of months
181215
if 'SNAPSHOT' in ShortName:
182216
if snapshot_interval == 'monthly':
183217
import re
184-
urls_list_copy = list(tuple(urls))
185-
sizes_list_copy = list(tuple(sizes))
186-
for idx,(url,size) in enumerate(zip(urls,sizes)):
218+
url_sizes_dict = {url:size for url,size in zip(urls,sizes)}
219+
for url,size in zip(urls,sizes):
187220
snapshot_date = re.findall("_[0-9]{4}-[0-9]{2}-[0-9]{2}",url)[0][1:]
188221
if snapshot_date[8:] != '01':
189-
urls_list_copy.remove(url)
190-
del sizes_list_copy[idx]
191-
urls = urls_list_copy
192-
sizes = sizes_list_copy
222+
del url_sizes_dict[url]
223+
urls = list(url_sizes_dict.keys())
224+
sizes = list(url_sizes_dict.values())
193225

194226
return urls,sizes
195227

0 commit comments

Comments
 (0)