Skip to content

Commit 3031837

Browse files
committed
Added s3_open_fsspec access mode
1 parent f9bff8f commit 3031837

4 files changed

Lines changed: 66 additions & 42 deletions

File tree

ecco_access/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from .ecco_download import ecco_podaac_download
88
from .ecco_download import ecco_podaac_download_subset
99

10+
from .ecco_s3_retrieve import setup_earthdata_login_auth
11+
from .ecco_s3_retrieve import init_S3FileSystem
1012
from .ecco_s3_retrieve import ecco_podaac_s3_query
1113
from .ecco_s3_retrieve import ecco_podaac_s3_open
1214
from .ecco_s3_retrieve import ecco_podaac_s3_open_fsspec

ecco_access/ecco_acc_dates.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def date_adjustment(ShortName,StartDate,EndDate,CMR_query=True):
4747
EndDate = str(np.datetime64(EndDate,'D') + np.timedelta64(1,'D'))
4848

4949
# CMR request adjustments
50-
if CMR_request:
50+
if CMR_query:
5151
SingleDay_flag = False
5252
if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
5353
if np.datetime64(EndDate,'D') - np.datetime64(StartDate,'D') \

ecco_access/ecco_access.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ def ecco_podaac_access_to_xrdataset(query,version='v4r4',grid=None,time_res='all
392392
ds_out[shortname] = xr.open_dataset(access_out,engine='zarr',consolidated=False)
393393
if 'time' in ds_out[shortname].dims:
394394
# isolate time range specified
395-
startdate,enddate = date_adjustment(ShortName,\
395+
startdate,enddate = date_adjustment(shortname,\
396396
StartDate,EndDate,CMR_query=False)
397397
time_values = ds_out[shortname].time.values.astype('datetime64[D]')
398398
in_time_range = np.logical_and(time_values >= startdate,\

ecco_access/ecco_s3_retrieve.py

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
### This module contains routines to access and retrieve ECCO datasets on the AWS Cloud.
22
### These functions will only work when called from an AWS EC2 instance running in region us-west-2.
33

4-
54
from .ecco_acc_dates import date_adjustment
65

76
## Initalize Python libraries for module
@@ -12,6 +11,48 @@
1211
import os.path
1312
from os.path import basename, isfile, isdir, join, expanduser
1413
from pathlib import Path
14+
from platform import system
15+
from netrc import netrc
16+
from urllib import request
17+
from http.cookiejar import CookieJar
18+
from getpass import getpass
19+
import requests
20+
21+
22+
def setup_earthdata_login_auth(url: str='urs.earthdata.nasa.gov'):
23+
"""Helper subroutine to log into NASA EarthData"""
24+
25+
# Predict the path of the netrc file depending on os/platform type.
26+
_netrc = join(expanduser('~'), "_netrc" if system()=="Windows" else ".netrc")
27+
28+
# look for the netrc file and use the login/password
29+
try:
30+
username, _, password = netrc(file=_netrc).authenticators(url)
31+
32+
# if the file is not found, prompt the user for the login/password
33+
except (FileNotFoundError, TypeError):
34+
print('Please provide Earthdata Login credentials for access.')
35+
username, password = input('Username: '), getpass('Password: ')
36+
37+
# write credentials to netrc file
38+
with open(_netrc,'a') as file:
39+
lines = ["machine urs.earthdata.nasa.gov\n",\
40+
" login "+username+"\n",\
41+
" password "+password]
42+
file.writelines(lines)
43+
file.close()
44+
45+
manager = request.HTTPPasswordMgrWithDefaultRealm()
46+
manager.add_password(None, url, username, password)
47+
auth = request.HTTPBasicAuthHandler(manager)
48+
jar = CookieJar()
49+
processor = request.HTTPCookieProcessor(jar)
50+
opener = request.build_opener(auth, processor)
51+
request.install_opener(opener)
52+
53+
54+
55+
###================================================================================================================
1556

1657

1758
def ecco_podaac_s3_query(ShortName,StartDate,EndDate):
@@ -42,41 +83,9 @@ def ecco_podaac_s3_query(ShortName,StartDate,EndDate):
4283

4384
pass
4485

45-
## Initalize Python libraries
46-
from platform import system
47-
from netrc import netrc
48-
from os.path import basename, isfile, isdir, join, expanduser
49-
from urllib import request
50-
from http.cookiejar import CookieJar
51-
52-
53-
# Predict the path of the netrc file depending on os/platform type.
54-
_netrc = join(expanduser('~'), "_netrc" if system()=="Windows" else ".netrc")
55-
5686

5787
## Define Helper Subroutines
5888

59-
### Helper subroutine to log into NASA EarthData
60-
61-
# not pretty but it works
62-
def setup_earthdata_login_auth(url: str='urs.earthdata.nasa.gov'):
63-
# look for the netrc file and use the login/password
64-
try:
65-
username, _, password = netrc(file=_netrc).authenticators(url)
66-
67-
# if the file is not found, prompt the user for the login/password
68-
except (FileNotFoundError, TypeError):
69-
print('Please provide Earthdata Login credentials for access.')
70-
username, password = input('Username: '), getpass('Password: ')
71-
72-
manager = request.HTTPPasswordMgrWithDefaultRealm()
73-
manager.add_password(None, url, username, password)
74-
auth = request.HTTPBasicAuthHandler(manager)
75-
jar = CookieJar()
76-
processor = request.HTTPCookieProcessor(jar)
77-
opener = request.build_opener(auth, processor)
78-
request.install_opener(opener)
79-
8089
### Helper subroutines to make the API calls to search CMR and parse response
8190
def set_params(params: dict):
8291
params.update({'scroll': "true", 'page_size': 2000})
@@ -127,8 +136,6 @@ def get_granules(params: dict, ShortName: str, SingleDay_flag: bool):
127136
StartDate,EndDate,CMR_query=True)
128137

129138
## Log into Earthdata using your username and password
130-
131-
# actually log in with this command:
132139
setup_earthdata_login_auth()
133140

134141
# Query the NASA Common Metadata Repository to find the URL of every granule associated with the desired
@@ -167,7 +174,6 @@ def init_S3FileSystem():
167174
168175
"""
169176

170-
import requests
171177
import s3fs
172178

173179
creds = requests.get('https://archive.podaac.earthdata.nasa.gov/s3credentials').json()
@@ -372,30 +378,46 @@ def ecco_podaac_s3_open_fsspec(ShortName,jsons_root_dir):
372378

373379
pass
374380

381+
import glob
375382
import fsspec
376383

377384

378385
# identify where json file is found
379386
shortname_split = ShortName.split('_')
380-
gridtype = shortname_split[-3]
381387
if 'GEOMETRY' in ShortName:
388+
gridtype = shortname_split[-2]
382389
time_res = 'GEOMETRY'
383390
elif 'MIX_COEFFS' in ShortName:
391+
gridtype = shortname_split[-2]
384392
time_res = 'MIXING_COEFFS'
385393
else:
394+
gridtype = shortname_split[-3]
386395
time_res = shortname_split[-2]
387396
json_subdir = join(jsons_root_dir,"_".join(['MZZ',gridtype,time_res]))
388-
json_file = join(json_subdir,ShortName+'.json')
397+
if (('GEOMETRY' in ShortName) or ('MIX_COEFFS' in ShortName)):
398+
if 'LLC' in gridtype:
399+
json_file = glob.glob(join(json_subdir,'*native*.json'))[0]
400+
elif 'DEG' in gridtype:
401+
json_file = glob.glob(join(json_subdir,'*latlon*.json'))[0]
402+
else:
403+
json_file = join(json_subdir,ShortName+'.json')
404+
405+
406+
# get NASA Earthdata credentials for S3
407+
creds = requests.get('https://archive.podaac.earthdata.nasa.gov/s3credentials').json()
389408

390409
# generate map object
391410
fs = fsspec.filesystem(\
392411
"reference",
393412
fo=json_file,\
394413
remote_protocol="s3",
395-
remote_options={"anon":True},
414+
remote_options={"anon":False,\
415+
"key":creds['accessKeyId'],
416+
"secret":creds['secretAccessKey'],
417+
"token":creds['sessionToken']},\
396418
skip_instance_cache=True)
397419
fsmap_obj = fs.get_mapper("")
398-
420+
399421
return fsmap_obj
400422

401423

0 commit comments

Comments
 (0)