Skip to content

Commit 0bd672f

Browse files
authored
Merge pull request #92 from andrewdelman/ecco_access_updates
ecco_access updates to budget tutorials, and load data as dask arrays in mode = 's3_open_fsspec'
2 parents 9d28c60 + 7df1f0f commit 0bd672f

5 files changed

Lines changed: 1158 additions & 396 deletions

File tree

Tutorials_as_Jupyter_Notebooks/ECCO_v4_Heat_budget_closure.ipynb

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
"# Global Heat Budget Closure\n",
88
"*Contributors*: [Jan-Erik Tesdal](https://github.com/jetesdal), [Ryan Abernathey](https://github.com/rabernat), [Ian Fenty](https://github.com/ifenty), and [Andrew Delman](https://github.com/andrewdelman)\n",
99
"\n",
10+
"Updated 2024-10-16\n",
11+
"\n",
1012
"A major part of this tutorial is based on \"*A Note on Practical Evaluation of Budgets in ECCO Version 4 Release 3\"* by Christopher G. Piecuch (https://ecco.jpl.nasa.gov/drive/files/Version4/Release3/doc/v4r3_budgets_howto.pdf). Calculation steps and Python code presented here are converted from the MATLAB code presented in the above reference."
1113
]
1214
},
@@ -78,15 +80,15 @@
7880
"\n",
7981
"## Datasets to download\n",
8082
"\n",
81-
"If you don't have any of the following datasets already, you will need to download them to complete the tutorial. Aside from the grid geometry file (which has no time dimension), you will need 2 monthly datasets for the full time span of ECCOv4r4 output (1992 through 2017). The ShortNames of the datasets are:\n",
83+
"Here are the ShortNames of the NASA Earthdata datasets that are needed for this tutorial:\n",
8284
"\n",
8385
"- **ECCO_L4_GEOMETRY_LLC0090GRID_V4R4**\n",
8486
"- **ECCO_L4_OCEAN_3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY_V4R4** (1993-2016)\n",
8587
"- **ECCO_L4_HEAT_FLUX_LLC0090GRID_MONTHLY_V4R4** (1993-2016)\n",
8688
"- **ECCO_L4_SSH_LLC0090GRID_SNAPSHOT_V4R4** (1993/1/1-2017/1/1, 1st of each month)\n",
8789
"- **ECCO_L4_TEMP_SALINITY_LLC0090GRID_SNAPSHOT_V4R4** (1993/1/1-2017/1/1, 1st of each month)\n",
8890
"\n",
89-
"If you haven't yet been through the [download tutorial](https://ecco-v4-python-tutorial.readthedocs.io/Downloading_ECCO_Datasets_from_PODAAC_Python.html) or used the [ecco_download module](https://ecco-v4-python-tutorial.readthedocs.io/Downloading_ECCO_Datasets_from_PODAAC_Python.html#ECCO_download-module:-the-quick-and-easy-method), it may help you to review that information before downloading the datasets. If you are downloading the snapshots, you may also find the [snaps_monthly_textlist](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_v4_Salt_and_salinity_budget.html#Load-monthly-snapshots) function helpful; it generates a file list of only the snapshot files for the 1st of each month, which you can then download using `wget`."
91+
"If you haven't yet [set up](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_access_intro.html#Setting-up-ecco_access) the `ecco_access` package in your path, you should do that before running this notebook. The `ecco_access.ecco_podaac_to_xrdataset` function used in the notebooks will handle the downloads or (in the AWS Cloud) direct access of the output, and open the data as an `xarray` dataset."
9092
]
9193
},
9294
{
@@ -118,8 +120,18 @@
118120
"from os.path import join,expanduser,exists,split\n",
119121
"user_home_dir = expanduser('~')\n",
120122
"\n",
121-
"# indicate whether you are working in a cloud instance (True if yes, False otherwise)\n",
122-
"incloud_access = False"
123+
"# indicate mode of access\n",
124+
"# options are:\n",
125+
"# 'download': direct download from internet to your local machine\n",
126+
"# 'download_ifspace': like download, but only proceeds \n",
127+
"# if your machine have sufficient storage\n",
128+
"# 's3_open': access datasets in-cloud from an AWS instance\n",
129+
"# 's3_open_fsspec': use jsons generated with fsspec and \n",
130+
"# kerchunk libraries to speed up in-cloud access\n",
131+
"# 's3_get': direct download from S3 in-cloud to an AWS instance\n",
132+
"# 's3_get_ifspace': like s3_get, but only proceeds if your instance \n",
133+
"# has sufficient storage\n",
134+
"access_mode = 'download_ifspace'"
123135
]
124136
},
125137
{
@@ -206,9 +218,13 @@
206218
"## Set top-level file directory for the ECCO NetCDF files\n",
207219
"## =================================================================\n",
208220
"\n",
209-
"## currently set to ~/Downloads/ECCO_V4r4_PODAAC, \n",
221+
"## currently set to /efs_ecco/ECCO_V4r4_PODAAC, \n",
210222
"## the default if ecco_podaac_download was used to download dataset granules\n",
211-
"ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')"
223+
"ECCO_dir = join(user_home_dir,'ECCO_V4r4_PODAAC')\n",
224+
"\n",
225+
"# # for access_mode = 's3_open_fsspec', need to specify the root directory \n",
226+
"# # containing the jsons\n",
227+
"# jsons_root_dir = join('/efs_ecco','mzz-jsons')"
212228
]
213229
},
214230
{
@@ -224,12 +240,14 @@
224240
" \"ECCO_L4_HEAT_FLUX_LLC0090GRID_MONTHLY_V4R4\",\\\n",
225241
" \"ECCO_L4_SSH_LLC0090GRID_SNAPSHOT_V4R4\",\\\n",
226242
" \"ECCO_L4_TEMP_SALINITY_LLC0090GRID_SNAPSHOT_V4R4\"]\n",
227-
"if incloud_access == True:\n",
228-
" from ecco_s3_retrieve import ecco_podaac_s3_get_diskaware\n",
229-
" files_dict = ecco_podaac_s3_get_diskaware(ShortNames=ShortNames_list,\\\n",
230-
" StartDate='1993-01',EndDate='2016-12',\\\n",
231-
" max_avail_frac=0.5,\\\n",
232-
" download_root_dir=ECCO_dir)"
243+
"StartDate = '1993-01'\n",
244+
"EndDate = '2016-12'\n",
245+
"ds_dict = ea.ecco_podaac_to_xrdataset(ShortNames_list,\\\n",
246+
" StartDate=StartDate,EndDate=EndDate,\\\n",
247+
" snapshot_interval='monthly',\\\n",
248+
" mode=access_mode,\\\n",
249+
" download_root_dir=ECCO_dir,\\\n",
250+
" max_avail_frac=0.5)"
233251
]
234252
},
235253
{
@@ -259,10 +277,7 @@
259277
"outputs": [],
260278
"source": [
261279
"## Load the model grid\n",
262-
"if incloud_access == True:\n",
263-
" ecco_grid = xr.open_dataset(files_dict[ShortNames_list[0]])\n",
264-
"else:\n",
265-
" ecco_grid = xr.open_dataset(glob.glob(join(ECCO_dir,'*GEOMETRY*','*.nc'))[0])"
280+
"ecco_grid = ds_dict[ShortNames_list[0]].compute()"
266281
]
267282
},
268283
{
@@ -300,16 +315,8 @@
300315
"year_end = 2016\n",
301316
"\n",
302317
"# open ETAN and THETA snapshots (beginning of each month)\n",
303-
"if incloud_access == True:\n",
304-
" ecco_monthly_SSH = xr.open_mfdataset(files_dict[ShortNames_list[-2]],\\\n",
305-
" data_vars='minimal',coords='minimal',compat='override',parallel=True)\n",
306-
" ecco_monthly_TS = xr.open_mfdataset(files_dict[ShortNames_list[-1]],\\\n",
307-
" data_vars='minimal',coords='minimal',compat='override',parallel=True)\n",
308-
"else:\n",
309-
" ecco_monthly_SSH = xr.open_mfdataset(join(ECCO_dir,'*SSH*SNAPSHOT*','*-01T*.nc'),\\\n",
310-
" data_vars='minimal',coords='minimal',compat='override',parallel=True)\n",
311-
" ecco_monthly_TS = xr.open_mfdataset(join(ECCO_dir,'*TEMP_SALINITY*SNAPSHOT*','*-01T*.nc'),\\\n",
312-
" data_vars='minimal',coords='minimal',compat='override',parallel=True)\n",
318+
"ecco_monthly_SSH = ds_dict[ShortNames_list[3]]\n",
319+
"ecco_monthly_TS = ds_dict[ShortNames_list[4]]\n",
313320
"ecco_monthly_snaps = xr.merge((ecco_monthly_SSH['ETAN'],ecco_monthly_TS['THETA']))\n",
314321
"\n",
315322
"# time mask for snapshots\n",
@@ -373,16 +380,10 @@
373380
"outputs": [],
374381
"source": [
375382
"## Open ECCO monthly mean variables\n",
376-
"if incloud_access == True:\n",
377-
" ecco_vars_int = xr.open_mfdataset(files_dict[ShortNames_list[1]],\\\n",
378-
" data_vars='minimal',coords='minimal',compat='override',parallel=True)\n",
379-
" ecco_vars_sfc = xr.open_mfdataset(files_dict[ShortNames_list[2]],\\\n",
380-
" data_vars='minimal',coords='minimal',compat='override',parallel=True)\n",
381-
"else:\n",
382-
" ecco_vars_int = xr.open_mfdataset(join(ECCO_dir,'*_OCEAN_3D_TEMPERATURE_FLUX*MONTHLY*','*.nc'),\\\n",
383-
" data_vars='minimal',coords='minimal',compat='override',parallel=True)\n",
384-
" ecco_vars_sfc = xr.open_mfdataset(join(ECCO_dir,'*_HEAT_FLUX*MONTHLY*','*.nc'),\\\n",
385-
" data_vars='minimal',coords='minimal',compat='override',parallel=True)\n",
383+
"\n",
384+
"ecco_vars_int = ds_dict[ShortNames[1]]\n",
385+
"ecco_vars_sfc = ds_dict[ShortNames[2]]\n",
386+
"\n",
386387
"ecco_monthly_mean = xr.merge((ecco_vars_int,\\\n",
387388
" ecco_vars_sfc[['TFLUX','oceQsw']]))\n",
388389
"\n",
@@ -3291,7 +3292,7 @@
32913292
"name": "python",
32923293
"nbconvert_exporter": "python",
32933294
"pygments_lexer": "ipython3",
3294-
"version": "3.12.3"
3295+
"version": "3.11.9"
32953296
}
32963297
},
32973298
"nbformat": 4,

0 commit comments

Comments
 (0)