99
1010from .ecco_s3_retrieve import ecco_podaac_s3_query
1111from .ecco_s3_retrieve import ecco_podaac_s3_open
12+ from .ecco_s3_retrieve import ecco_podaac_s3_open_fsspec
1213from .ecco_s3_retrieve import ecco_podaac_s3_get
1314from .ecco_s3_retrieve import ecco_podaac_s3_get_diskaware
1415
1516
16-
1717def ecco_podaac_access (query ,version = 'v4r4' ,grid = None ,time_res = 'all' ,\
1818 StartDate = None ,EndDate = None ,\
1919 mode = 'download_ifspace' ,download_root_dir = None ,** kwargs ):
@@ -54,7 +54,7 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
5454 StartDate,EndDate: str, in 'YYYY', 'YYYY-MM', or 'YYYY-MM-DD' format,
5555 define date range [StartDate,EndDate] for download.
5656 EndDate is included in the time range (unlike typical Python ranges).
57- ECCOv4r4 date range is '1992-01-01' to '2017-12-31'.
57+ Full ECCOv4r4 date range (default) is '1992-01-01' to '2017-12-31'.
5858 For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets
5959 within the specified date range.
6060
@@ -72,13 +72,13 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
7272 to see keyword arguments that can be used in this mode.
7373 The following modes work within the AWS cloud only:
7474 's3_open': Access datasets on S3 without downloading.
75+ 's3_open_fsspec': Use json files (generated with `fsspec` and `kerchunk`)
76+ for expedited opening of datasets.
7577 's3_get': Download from S3 (to AWS EC2 instance).
7678 's3_get_ifspace': Check storage availability before downloading;
7779 download if storage footprint
7880 <= max_avail_frac*(available storage).
7981 Otherwise data are opened "remotely" from S3 bucket.
80- 's3_fsspec': Use `fsspec` json files (generated with `kerchunk`)
81- for expedited loading of datasets.
8282
8383 download_root_dir: str, defines parent directory to download files to.
8484 Files will be downloaded to directory download_root_dir/ShortName/.
@@ -95,6 +95,15 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
9595 Valid range is [0,0.9]. If number provided is outside this range, it is replaced by the closer
9696 endpoint of the range.
9797
98+ jsons_root_dir: str, for s3_open_fsspec mode only, the root/parent directory where the
99+ fsspec/kerchunk-generated jsons are found.
100+ jsons are generated using the steps described here:
101+ https://medium.com/pangeo/fake-it-until-you-make-it-reading-goes-netcdf4-data-on-aws-s3-as-zarr
102+ -for-rapid-data-access-61e33f8fe685
103+ and stored as {jsons_root_dir}/MZZ_{GRIDTYPE}_{TIME_RES}/{SHORTNAME}.json.
104+ For v4r4, GRIDTYPE is '05DEG' or 'LLC0090GRID'.
105+ TIME_RES is one of: ('MONTHLY','DAILY','SNAPSHOT','GEOMETRY','MIXING_COEFFS').
106+
98107 n_workers: int, number of workers to use in concurrent downloads. Benefits typically taper off above 5-6.
99108
100109 force_redownload: bool, if True, existing files will be redownloaded and replaced;
@@ -153,14 +162,22 @@ def shortnames_find(query_list,grid,time_res):
153162
154163 possible_mode_list = "['ls','query','s3_ls','s3_query','download',\n " \
155164 + "'download_ifspace','download_subset',\n " \
156- + "'s3_open','s3_get','s3_get_ifspace','s3_fsspec ']"
165+ + "'s3_open','s3_get','s3_get_ifspace','s3_open_fsspec ']"
157166
158167 # set some default keyword arguments
159168 if (('n_workers' not in kwargs .keys ()) and (mode != 'download_subset' )):
160169 kwargs ['n_workers' ] = 6
161170 if 'force_redownload' not in kwargs .keys ():
162171 kwargs ['force_redownload' ] = False
163-
172+
173+ # remove unneeded keyword arguments
174+ if mode == 's3_open_fsspec' :
175+ for kwarg in list (kwargs .keys ()):
176+ if kwarg != 'jsons_root_dir' :
177+ del kwargs [kwarg ]
178+ else :
179+ if 'jsons_root_dir' in kwargs .keys ():
180+ del kwargs ['jsons_root_dir' ]
164181
165182 # download or otherwise access granules, depending on mode
166183
@@ -206,14 +223,16 @@ def shortnames_find(query_list,grid,time_res):
206223 elif mode == 's3_open' :
207224 granule_files [shortname ] = ecco_podaac_s3_open (\
208225 shortname ,StartDate ,EndDate )
226+ elif mode == 's3_open_fsspec' :
227+ # granule_files will consist of mapper objects rather than URL/path or file lists
228+ granule_files [shortname ] = ecco_podaac_s3_open_fsspec (\
229+ shortname ,** kwargs )
209230 elif mode == 's3_get' :
210231 kwargs ['return_downloaded_files' ] = True
211232 granule_files [shortname ] = ecco_podaac_s3_get (\
212233 shortname ,StartDate ,EndDate ,\
213234 download_root_dir = download_root_dir ,\
214235 ** kwargs )
215- elif mode == 's3_fsspec' :
216- print ('Placeholder for jsons' )
217236 else :
218237 raise ValueError ('Invalid mode specified; please specify one of the following:' \
219238 + '\n ' + possible_mode_list )
@@ -225,7 +244,7 @@ def shortnames_find(query_list,grid,time_res):
225244 return_granules = True
226245 if return_granules :
227246 for shortname in granule_files .keys ():
228- if len (granule_files [shortname ]) == 1 :
247+ if (( len (granule_files [shortname ]) == 1 ) and ( mode != 's3_open_fsspec' )) :
229248 # if only 1 file is downloaded, return a string of filename instead of a list
230249 granule_files [shortname ] = granule_files [shortname ][0 ]
231250
0 commit comments