Skip to content

Commit b9bc354

Browse files
authored
Merge pull request #94 from andrewdelman/ecco_access_updates
Correction to ecco_access handling of snapshots in download modes
2 parents e02a8a3 + 800caf2 commit b9bc354

4 files changed

Lines changed: 81 additions & 45 deletions

File tree

Tutorials_as_Jupyter_Notebooks/ECCO_v4_Heat_budget_closure.ipynb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,12 @@
116116
"import os\n",
117117
"import sys\n",
118118
"import glob\n",
119+
"import ecco_access as ea\n",
119120
"\n",
120121
"from os.path import join,expanduser,exists,split\n",
121122
"user_home_dir = expanduser('~')\n",
122123
"\n",
124+
"\n",
123125
"# indicate mode of access\n",
124126
"# options are:\n",
125127
"# 'download': direct download from internet to your local machine\n",
@@ -131,7 +133,7 @@
131133
"# 's3_get': direct download from S3 in-cloud to an AWS instance\n",
132134
"# 's3_get_ifspace': like s3_get, but only proceeds if your instance \n",
133135
"# has sufficient storage\n",
134-
"access_mode = 'download_ifspace'"
136+
"access_mode = 'query'"
135137
]
136138
},
137139
{
@@ -233,7 +235,7 @@
233235
"metadata": {},
234236
"outputs": [],
235237
"source": [
236-
"## if working in the AWS cloud, access datasets needed for this tutorial\n",
238+
"## access datasets needed for this tutorial\n",
237239
"\n",
238240
"ShortNames_list = [\"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\",\\\n",
239241
" \"ECCO_L4_OCEAN_3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY_V4R4\",\\\n",

Tutorials_as_Jupyter_Notebooks/ECCO_v4_Salt_and_salinity_budget.ipynb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@
164164
"import glob\n",
165165
"import psutil\n",
166166
"import os\n",
167+
"import ecco_access as ea\n",
167168
"\n",
168169
"from os.path import expanduser,join\n",
169170
"user_home_dir = expanduser('~')\n",
@@ -292,7 +293,7 @@
292293
"metadata": {},
293294
"outputs": [],
294295
"source": [
295-
"## if working in the AWS cloud, access datasets needed for this tutorial\n",
296+
"## access datasets needed for this tutorial\n",
296297
"\n",
297298
"ShortNames_list = [\"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\",\\\n",
298299
" \"ECCO_L4_FRESH_FLUX_LLC0090GRID_MONTHLY_V4R4\",\\\n",
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../ecco_access

ecco_access/ecco_download.py

Lines changed: 74 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -93,25 +93,60 @@ def set_params(params: dict):
9393
return {par: val for par, val in params.items() if val is not None}
9494

9595
def get_results(params: dict, headers: dict=None):
96-
response = requests.get(url="https://cmr.earthdata.nasa.gov/search/granules.csv",
96+
response = requests.get(url="https://cmr.earthdata.nasa.gov/search/granules.json",
9797
params=set_params(params),
98-
headers=headers)
99-
return response, response.headers
100-
101-
102-
def get_granules(params: dict):
103-
response, headers = get_results(params=params)
104-
# scroll = headers['CMR-Scroll-Id']
105-
hits = int(headers['CMR-Hits'])
106-
if hits==0:
107-
raise Exception("No granules matched your input parameters.")
108-
df = pd.read_csv(StringIO(response.text))
109-
while hits > df.index.size:
110-
# response, _ = get_results(params=params, headers={'CMR-Scroll-Id': scroll})
111-
response, _ = get_results(params=params)
112-
data = pd.read_csv(StringIO(response.text))
113-
df = pd.concat([df, data])
114-
return df
98+
headers=headers).json()
99+
return response
100+
101+
102+
# def get_granules(params: dict):
103+
# time_start = np.array([]).astype('datetime64[ns]')
104+
# response, headers = get_results(params=params)
105+
# # scroll = headers['CMR-Scroll-Id']
106+
# hits = int(headers['CMR-Hits'])
107+
# if hits==0:
108+
# raise Exception("No granules matched your input parameters.")
109+
# df = pd.read_csv(StringIO(response.text))
110+
# while hits > df.index.size:
111+
# # response, _ = get_results(params=params, headers={'CMR-Scroll-Id': scroll})
112+
# response, _ = get_results(params=params)
113+
# data = pd.read_csv(StringIO(response.text))
114+
# df = pd.concat([df, data])
115+
# return df
116+
117+
def get_granules(params: dict, ShortName: str, SingleDay_flag: bool):
118+
time_start = np.array([]).astype('datetime64[ns]')
119+
urls = []
120+
sizes = []
121+
completed_query = False
122+
while completed_query == False:
123+
response = get_results(params=params)
124+
if 'feed' in response.keys():
125+
for curr_entry in response['feed']['entry']:
126+
time_start = np.append(time_start,np.datetime64(curr_entry['time_start'],'ns'))
127+
sizes.append(curr_entry['granule_size'])
128+
for curr_link in curr_entry['links']:
129+
if ".nc" in curr_link['title'][-3:]:
130+
urls.append(curr_link['href'])
131+
break
132+
elif 'errors' in response.keys():
133+
raise Exception(response['errors'][0])
134+
135+
if len(response['feed']['entry']) < 2000:
136+
completed_query = True
137+
else:
138+
# do another CMR search since previous search hit the allowed maximum
139+
# number of entries (2000)
140+
params['temporal'] = str(np.datetime64(response['feed']['entry'][-1]['time_end'],'D')\
141+
+ np.timedelta64(1,'D'))+params['temporal'][10:]
142+
143+
# reduce granule list to single day if only one day in requested range
144+
if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
145+
if ((SingleDay_flag == True) and (len(urls) > 1)):
146+
day_index = np.argmin(np.abs(time_start - np.datetime64(StartDate,'D')))
147+
urls = urls[day_index:(day_index+1)]
148+
149+
return urls,sizes
115150

116151

117152

@@ -148,48 +183,45 @@ def get_granules(params: dict):
148183
### Query CMR for the desired ECCO Dataset
149184

150185
# grans means 'granules', PO.DAAC's term for individual files in a dataset
151-
grans = get_granules(input_search_params)
152-
153-
154-
## Prepare results of query
155-
156-
# reduce granule list to single day if only one day in requested range
157-
if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
158-
if ((SingleDay_flag == True) and (len(grans['Granule UR']) > 1)):
159-
day_index = np.argmin(np.abs(np.asarray(grans['Start Time'])\
160-
.astype('datetime64[ns]') - np.datetime64(StartDate,'D')))
161-
grans = grans[day_index:(day_index+1)]
162-
163-
# convert the rows of the 'Online Access URLS' column to a Python list
164-
urls = grans['Online Access URLs'].tolist()
186+
urls,gran_sizes = get_granules(input_search_params,ShortName,SingleDay_flag)
187+
188+
# ## Prepare results of query
189+
#
190+
# # reduce granule list to single day if only one day in requested range
191+
# if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
192+
# if ((SingleDay_flag == True) and (len(grans['Granule UR']) > 1)):
193+
# day_index = np.argmin(np.abs(np.asarray(grans['Start Time'])\
194+
# .astype('datetime64[ns]') - np.datetime64(StartDate,'D')))
195+
# grans = grans[day_index:(day_index+1)]
196+
#
197+
# # convert the rows of the 'Online Access URLS' column to a Python list
198+
# urls = grans['Online Access URLs'].tolist()
165199

166200
# estimate granule sizes where this info is missing from CMR
167-
sizes = (2**20)*np.asarray(grans['Size']).astype('float64')
201+
sizes = (2**20)*np.asarray(gran_sizes).astype('float64')
168202
sizes = np.where(sizes > (2**10),sizes,np.nan)
169203
if np.sum(~np.isnan(sizes)) >= 1:
170204
sizes = np.where(~np.isnan(sizes),sizes,np.nanmean(sizes))
171205
else:
172206
input_search_params['temporal'] = ['1992-01-01','2017-12-31']
173-
grans_all = get_granules(input_search_params)
207+
_,gran_sizes_all = get_granules(input_search_params)
174208
sizes_all = (2**20)*np.asarray(grans_all['Size']).astype('float64')
175209
sizes_all = np.where(sizes_all > (2**10),sizes_all,np.nan)
176210
sizes = np.where(~np.isnan(sizes),sizes,np.nanmean(sizes_all))
177211
sizes = list(sizes)
178-
urls = grans['Online Access URLs'].tolist()
212+
# urls = grans['Online Access URLs'].tolist()
179213

180214
# for snapshot datasets with monthly snapshot_interval, only include snapshots at beginning/end of months
181215
if 'SNAPSHOT' in ShortName:
182216
if snapshot_interval == 'monthly':
183217
import re
184-
urls_list_copy = list(tuple(urls))
185-
sizes_list_copy = list(tuple(sizes))
186-
for idx,(url,size) in enumerate(zip(urls,sizes)):
218+
url_sizes_dict = {url:size for url,size in zip(urls,sizes)}
219+
for url,size in zip(urls,sizes):
187220
snapshot_date = re.findall("_[0-9]{4}-[0-9]{2}-[0-9]{2}",url)[0][1:]
188221
if snapshot_date[8:] != '01':
189-
urls_list_copy.remove(url)
190-
del sizes_list_copy[idx]
191-
urls = urls_list_copy
192-
sizes = sizes_list_copy
222+
del url_sizes_dict[url]
223+
urls = list(url_sizes_dict.keys())
224+
sizes = list(url_sizes_dict.values())
193225

194226
return urls,sizes
195227

0 commit comments

Comments
 (0)