Skip to content

Commit 7332cdf

Browse files
committed
ecco_s3_retrieve workaround for when search results exceed 2000
1 parent a407556 commit 7332cdf

1 file changed

Lines changed: 28 additions & 16 deletions

File tree

ECCO-ACCESS/ecco_s3_retrieve.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -87,26 +87,36 @@ def get_results(params: dict, headers: dict=None):
8787
return response
8888

8989
def get_granules(params: dict, ShortName: str, SingleDay_flag: bool):
90-
response = get_results(params=params)
91-
if 'feed' in response.keys():
92-
time_start = np.array([]).astype('datetime64[ns]')
93-
s3_files_list = []
94-
for curr_entry in response['feed']['entry']:
95-
time_start = np.append(time_start,np.datetime64(curr_entry['time_start'],'ns'))
96-
for curr_link in curr_entry['links']:
97-
if "direct download access via S3" in curr_link['title']:
98-
s3_files_list.append(curr_link['href'])
99-
break
100-
elif 'errors' in response.keys():
101-
raise Exception(response['errors'][0])
90+
time_start = np.array([]).astype('datetime64[ns]')
91+
s3_files_list = []
92+
completed_query = False
93+
while completed_query == False:
94+
response = get_results(params=params)
95+
if 'feed' in response.keys():
96+
for curr_entry in response['feed']['entry']:
97+
time_start = np.append(time_start,np.datetime64(curr_entry['time_start'],'ns'))
98+
for curr_link in curr_entry['links']:
99+
if "direct download access via S3" in curr_link['title']:
100+
s3_files_list.append(curr_link['href'])
101+
break
102+
elif 'errors' in response.keys():
103+
raise Exception(response['errors'][0])
104+
105+
if len(response['feed']['entry']) < 2000:
106+
completed_query = True
107+
else:
108+
# do another CMR search since previous search hit the allowed maximum
109+
# number of entries (2000)
110+
params['temporal'] = str(np.datetime64(response['feed']['entry'][-1]['time_end'],'D')\
111+
+ np.timedelta64(1,'D'))+params['temporal'][10:]
102112

103113
# reduce granule list to single day if only one day in requested range
104114
if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
105115
if ((SingleDay_flag == True) and (len(s3_files_list) > 1)):
106116
day_index = np.argmin(np.abs(time_start - np.datetime64(StartDate,'D')))
107117
s3_files_list = s3_files_list[day_index:(day_index+1)]
108118

109-
return s3_files_list
119+
return s3_files_list
110120

111121

112122
# # Adjust StartDate and EndDate to CMR query values
@@ -508,8 +518,9 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5
508518

509519
pass
510520

511-
import shutil
512-
521+
import shutil
522+
523+
513524
# force max_avail_frac to be within limits [0,0.9]
514525
max_avail_frac = np.fmin(np.fmax(max_avail_frac,0),0.9)
515526

@@ -538,9 +549,10 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5
538549

539550
# for snapshot datasets with monthly snapshot_interval, only include snapshots at beginning/end of months
540551
if (('SNAPSHOT' in curr_shortname) and (snapshot_interval == 'monthly')):
552+
import re
541553
s3_files_list_copy = list(tuple(s3_files_list))
542554
for s3_file in s3_files_list:
543-
snapshot_date = re.findall("_[0-9]{4}-[0-9]{2}-[0-9]{2}",url)[0][1:]
555+
snapshot_date = re.findall("_[0-9]{4}-[0-9]{2}-[0-9]{2}",s3_file)[0][1:]
544556
if snapshot_date[8:] != '01':
545557
s3_files_list_copy.remove(s3_file)
546558
s3_files_list = s3_files_list_copy

0 commit comments

Comments
 (0)