Skip to content

Commit 46cee36

Browse files
authored
Merge pull request #67 from andrewdelman/subset_download
New function to download subsets in ecco_download.py, and associated tutorial
2 parents 9400d16 + f2bfb85 commit 46cee36

9 files changed

Lines changed: 5485 additions & 236 deletions

ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/Tutorial_Python3_Downloading_ECCO_Subsets.ipynb

Lines changed: 4472 additions & 0 deletions
Large diffs are not rendered by default.

ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/Tutorial_Python3_Jupyter_Notebook_Downloading_ECCO_Datasets_from_PODAAC.ipynb

Lines changed: 91 additions & 61 deletions
Large diffs are not rendered by default.

ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/ecco_download.py

Lines changed: 785 additions & 28 deletions
Large diffs are not rendered by default.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#!/bin/bash
2+
3+
# Andrew Delman, December 2023
4+
#
5+
# This script takes PO.DAAC Opendap URLs listed in a text file
6+
# and downloads them to the current directory, or another directory
7+
# specified by option -P.
8+
# For example:
9+
# ./wget_download_fromlist.sh -i urls_download.txt \
10+
# -P /ECCOv4_downloads/ -n Caribbean \
11+
# -u username -p password
12+
# downloads the files from URLs listed in ./urls_download.txt,
13+
# to the directory /ECCOv4_downloads/, and appends the
14+
# identifier 'Caribbean' to each of the downloaded file names.
15+
# Input options can be specified either using the -i -P -n -u -p tags
16+
# shown above, or sequentially in that order without the tags.
17+
# However, option and positional/sequential inputs can not be combined
18+
# when this script is called.
19+
#
20+
# Note: if NASA Earthdata user authentication is already stored in
21+
# the user's .netrc file, then -u and -p can be omitted,
22+
# and storing the authentication in .netrc is recommended for frequent users.
23+
# See https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+cURL+And+Wget
24+
# for a step-by-step guide to set this up.
25+
26+
27+
# default arguments
28+
download_dir="./"
29+
append_id=""
30+
31+
# positional argument support
32+
if [ "$1" != "-i" ]; then
33+
download_dir="$2"
34+
append_id="$3"
35+
username="$4"
36+
password="$5"
37+
fi
38+
39+
# if input options given, assign to string variables
40+
while getopts ":i:P:n:u:p:" option; do
41+
case $option in
42+
i) # text file specifying URLs to download
43+
url_listfile="$OPTARG";;
44+
P) # directory (with path) to download files to
45+
download_dir="$OPTARG";;
46+
n) # identifier to append to file names
47+
append_id="$OPTARG";;
48+
u) # Earthdata username
49+
username="$OPTARG";;
50+
p) # Earthdata password
51+
password="$OPTARG";;
52+
esac
53+
done
54+
55+
# if -i option not supplied then assume $1 is URL file list
56+
if [ -z ${url_listfile+x} ]; then
57+
url_listfile="$1"
58+
fi
59+
60+
# if no input arguments supplied, return error message and exit
61+
if [ -z ${1+x} ]; then
62+
echo "Error: no URL file list supplied. No files downloaded."
63+
exit
64+
fi
65+
66+
# create download directory if it does not already exist
67+
mkdir -p "$download_dir"
68+
69+
70+
# read URLs from URL text file and download to $download_dir,
71+
# with file names as the name of the granule plus $append_id
72+
while IFS= read -r line; do
73+
no_paths=${line##*/granules/}
74+
after_dap=${no_paths#*.dap.}
75+
filename=${no_paths%.dap.nc*}
76+
if [ "${after_dap:0:2}" = "nc" ]; then
77+
filename=$filename"_"$append_id".nc"
78+
else
79+
echo "Downloaded file type uncertain; may not be NetCDF"
80+
fi
81+
if [ "${download_dir:(-1)}" = "/" ]; then
82+
path_filename=$download_dir$filename
83+
else
84+
path_filename=$download_dir"/"$filename
85+
fi
86+
if [ -n "$username" ] && [ -n "$password" ]; then
87+
wget -nv -nc -c -O $path_filename \
88+
--user="$username" --password="$password" $line
89+
else
90+
wget -nv -nc -c -O $path_filename $line
91+
fi
92+
done < $url_listfile
777 KB
Binary file not shown.

Intro_to_PO_Tutorials/Geostrophic_balance.ipynb

Lines changed: 38 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"metadata": {},
66
"source": [
77
"# Part 1: Geostrophic balance\n",
8-
"Andrew Delman, updated 2023-01-23.\n",
8+
"Andrew Delman, updated 2023-12-22.\n",
99
"\n",
1010
"## Objectives\n",
1111
"\n",
@@ -118,21 +118,26 @@
118118
"Help on function ecco_podaac_download in module ecco_download:\n",
119119
"\n",
120120
"ecco_podaac_download(ShortName, StartDate, EndDate, download_root_dir=None, n_workers=6, force_redownload=False)\n",
121-
" This routine downloads ECCO datasets from PO.DAAC. It is adapted from the Jupyter notebooks created by Jack McNelis and Ian Fenty (https://github.com/ECCO-GROUP/ECCO-ACCESS/blob/master/PODAAC/Downloading_ECCO_datasets_from_PODAAC/README.md) and modified by Andrew Delman (https://ecco-v4-python-tutorial.readthedocs.io).\n",
121+
" This routine downloads ECCO datasets from PO.DAAC. It is adapted from the Jupyter notebooks \n",
122+
" created by Jack McNelis and Ian Fenty (https://github.com/ECCO-GROUP/ECCO-ACCESS/blob/master/PODAAC/Downloading_ECCO_datasets_from_PODAAC/README.md)\n",
123+
" and modified by Andrew Delman (https://ecco-v4-python-tutorial.readthedocs.io).\n",
122124
" \n",
123125
" Parameters\n",
124126
" ----------\n",
125-
" ShortName: the ShortName of the dataset (can be identified from https://search.earthdata.nasa.gov/search?fpj=ECCO, selecting the \"i\" information button and the ShortName will appear in a gray box in the upper-left corner)\n",
126127
" \n",
127-
" StartDate: the start of the time range to be downloaded, expressed in the format \"YYYY-MM-DD\"\n",
128+
" ShortName: str, the ShortName that identifies the dataset on PO.DAAC.\n",
128129
" \n",
129-
" EndDate: the end of the time range to be downloaded, expressed in the format \"YYYY-MM-DD\"\n",
130+
" StartDate,EndDate: str, in 'YYYY', 'YYYY-MM', or 'YYYY-MM-DD' format, \n",
131+
" define date range [StartDate,EndDate] for download.\n",
132+
" EndDate is included in the time range (unlike typical Python ranges).\n",
133+
" ECCOv4r4 date range is '1992-01-01' to '2017-12-31'.\n",
134+
" For 'SNAPSHOT' datasets, an additional day is added to EndDate to enable closed budgets\n",
135+
" within the specified date range.\n",
130136
" \n",
131-
" download_root_dir: path of the parent directory to download ECCO files\n",
137+
" n_workers: int, number of workers to use in concurrent downloads.\n",
132138
" \n",
133-
" n_workers: number of workers to use in concurrent downloads\n",
134-
" \n",
135-
" force_redownload: if True, existing files will be redownloaded and replaced; if False, existing files will not be replaced\n",
139+
" force_redownload: bool, if True, existing files will be redownloaded and replaced;\n",
140+
" if False, existing files will not be replaced.\n",
136141
"\n"
137142
]
138143
}
@@ -158,33 +163,14 @@
158163
"output_type": "stream",
159164
"text": [
160165
"created download directory C:\\Users\\adelman\\Downloads\\ECCO_V4r4_PODAAC\\ECCO_L4_OCEAN_VEL_LLC0090GRID_MONTHLY_V4R4\n",
161-
"{'ShortName': 'ECCO_L4_OCEAN_VEL_LLC0090GRID_MONTHLY_V4R4', 'temporal': '2000-01-02,2000-01-31'}\n",
162166
"\n",
163-
"Total number of matching granules: 1\n"
164-
]
165-
},
166-
{
167-
"name": "stderr",
168-
"output_type": "stream",
169-
"text": [
170-
"100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:08<00:00, 8.11s/it]"
171-
]
172-
},
173-
{
174-
"name": "stdout",
175-
"output_type": "stream",
176-
"text": [
167+
"Total number of matching granules: 1\n",
168+
"DL Progress: 100%|###########################| 1/1 [00:06<00:00, 6.01s/it]\n",
177169
"\n",
178170
"=====================================\n",
179171
"total downloaded: 30.6 Mb\n",
180-
"avg download speed: 3.76 Mb/s\n"
181-
]
182-
},
183-
{
184-
"name": "stderr",
185-
"output_type": "stream",
186-
"text": [
187-
"\n"
172+
"avg download speed: 5.07 Mb/s\n",
173+
"Time spent = 6.032305955886841 seconds\n"
188174
]
189175
}
190176
],
@@ -193,7 +179,7 @@
193179
"# to default path ~/Downloads/ECCO_V4r4_PODAAC/\n",
194180
"vel_monthly_shortname = \"ECCO_L4_OCEAN_VEL_LLC0090GRID_MONTHLY_V4R4\"\n",
195181
"ecco_podaac_download(ShortName=vel_monthly_shortname,\\\n",
196-
" StartDate=\"2000-01-02\",EndDate=\"2000-01-31\",download_root_dir=None,\\\n",
182+
" StartDate=\"2000-01-01\",EndDate=\"2000-01-31\",download_root_dir=None,\\\n",
197183
" n_workers=6,force_redownload=False)"
198184
]
199185
},
@@ -207,33 +193,14 @@
207193
"output_type": "stream",
208194
"text": [
209195
"created download directory C:\\Users\\adelman\\Downloads\\ECCO_V4r4_PODAAC\\ECCO_L4_DENS_STRAT_PRESS_LLC0090GRID_MONTHLY_V4R4\n",
210-
"{'ShortName': 'ECCO_L4_DENS_STRAT_PRESS_LLC0090GRID_MONTHLY_V4R4', 'temporal': '2000-01-02,2000-01-31'}\n",
211196
"\n",
212-
"Total number of matching granules: 1\n"
213-
]
214-
},
215-
{
216-
"name": "stderr",
217-
"output_type": "stream",
218-
"text": [
219-
"100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00, 6.12s/it]"
220-
]
221-
},
222-
{
223-
"name": "stdout",
224-
"output_type": "stream",
225-
"text": [
197+
"Total number of matching granules: 1\n",
198+
"DL Progress: 100%|###########################| 1/1 [00:07<00:00, 7.11s/it]\n",
226199
"\n",
227200
"=====================================\n",
228201
"total downloaded: 30.98 Mb\n",
229-
"avg download speed: 5.05 Mb/s\n"
230-
]
231-
},
232-
{
233-
"name": "stderr",
234-
"output_type": "stream",
235-
"text": [
236-
"\n"
202+
"avg download speed: 4.36 Mb/s\n",
203+
"Time spent = 7.113083362579346 seconds\n"
237204
]
238205
}
239206
],
@@ -242,7 +209,7 @@
242209
"# to default path ~/Downloads/ECCO_V4r4_PODAAC/\n",
243210
"denspress_monthly_shortname = \"ECCO_L4_DENS_STRAT_PRESS_LLC0090GRID_MONTHLY_V4R4\"\n",
244211
"ecco_podaac_download(ShortName=denspress_monthly_shortname,\\\n",
245-
" StartDate=\"2000-01-02\",EndDate=\"2000-01-31\",download_root_dir=None,\\\n",
212+
" StartDate=\"2000-01-01\",EndDate=\"2000-01-31\",download_root_dir=None,\\\n",
246213
" n_workers=6,force_redownload=False)"
247214
]
248215
},
@@ -265,54 +232,23 @@
265232
"output_type": "stream",
266233
"text": [
267234
"created download directory C:\\Users\\adelman\\Downloads\\ECCO_V4r4_PODAAC\\ECCO_L4_OCEAN_VEL_LLC0090GRID_DAILY_V4R4\n",
268-
"{'ShortName': 'ECCO_L4_OCEAN_VEL_LLC0090GRID_DAILY_V4R4', 'temporal': '2000-01-01,2000-01-01'}\n",
269235
"\n",
270-
"Total number of matching granules: 2\n"
271-
]
272-
},
273-
{
274-
"name": "stderr",
275-
"output_type": "stream",
276-
"text": [
277-
"100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:07<00:00, 3.97s/it]\n"
278-
]
279-
},
280-
{
281-
"name": "stdout",
282-
"output_type": "stream",
283-
"text": [
236+
"Total number of matching granules: 1\n",
237+
"DL Progress: 100%|###########################| 1/1 [00:06<00:00, 6.58s/it]\n",
284238
"\n",
285239
"=====================================\n",
286-
"total downloaded: 61.36 Mb\n",
287-
"avg download speed: 7.72 Mb/s\n",
240+
"total downloaded: 30.68 Mb\n",
241+
"avg download speed: 4.65 Mb/s\n",
242+
"Time spent = 6.5920562744140625 seconds\n",
288243
"created download directory C:\\Users\\adelman\\Downloads\\ECCO_V4r4_PODAAC\\ECCO_L4_DENS_STRAT_PRESS_LLC0090GRID_DAILY_V4R4\n",
289-
"{'ShortName': 'ECCO_L4_DENS_STRAT_PRESS_LLC0090GRID_DAILY_V4R4', 'temporal': '2000-01-01,2000-01-01'}\n",
290244
"\n",
291-
"Total number of matching granules: 2\n"
292-
]
293-
},
294-
{
295-
"name": "stderr",
296-
"output_type": "stream",
297-
"text": [
298-
"100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:07<00:00, 3.73s/it]"
299-
]
300-
},
301-
{
302-
"name": "stdout",
303-
"output_type": "stream",
304-
"text": [
245+
"Total number of matching granules: 1\n",
246+
"DL Progress: 100%|###########################| 1/1 [00:04<00:00, 4.97s/it]\n",
305247
"\n",
306248
"=====================================\n",
307-
"total downloaded: 62.39 Mb\n",
308-
"avg download speed: 8.35 Mb/s\n"
309-
]
310-
},
311-
{
312-
"name": "stderr",
313-
"output_type": "stream",
314-
"text": [
315-
"\n"
249+
"total downloaded: 31.2 Mb\n",
250+
"avg download speed: 6.27 Mb/s\n",
251+
"Time spent = 4.976189613342285 seconds\n"
316252
]
317253
}
318254
],
@@ -348,35 +284,14 @@
348284
"output_type": "stream",
349285
"text": [
350286
"created download directory C:\\Users\\adelman\\Downloads\\ECCO_V4r4_PODAAC\\ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\n",
351-
"{'ShortName': 'ECCO_L4_GEOMETRY_LLC0090GRID_V4R4', 'temporal': '2000-01-01,2000-01-01'}\n",
352287
"\n",
353288
"Total number of matching granules: 1\n",
354-
"\n",
355-
"GRID_GEOMETRY_ECCO_V4r4_native_llc0090.nc already exists, and force=False, not re-downloading\n"
356-
]
357-
},
358-
{
359-
"name": "stderr",
360-
"output_type": "stream",
361-
"text": [
362-
"100%|████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]"
363-
]
364-
},
365-
{
366-
"name": "stdout",
367-
"output_type": "stream",
368-
"text": [
289+
"DL Progress: 100%|###########################| 1/1 [00:03<00:00, 3.74s/it]\n",
369290
"\n",
370291
"=====================================\n",
371-
"total downloaded: 0.0 Mb\n",
372-
"avg download speed: 0.0 Mb/s\n"
373-
]
374-
},
375-
{
376-
"name": "stderr",
377-
"output_type": "stream",
378-
"text": [
379-
"\n"
292+
"total downloaded: 8.57 Mb\n",
293+
"avg download speed: 2.29 Mb/s\n",
294+
"Time spent = 3.7480597496032715 seconds\n"
380295
]
381296
}
382297
],

Tutorials_as_Jupyter_Notebooks/ECCO_v4_data_structure_basics.ipynb

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,33 +27,14 @@
2727
"output_type": "stream",
2828
"text": [
2929
"created download directory C:\\Users\\adelman\\Downloads\\ECCO_V4r4_PODAAC\\ECCO_L4_TEMP_SALINITY_LLC0090GRID_MONTHLY_V4R4\n",
30-
"{'ShortName': 'ECCO_L4_TEMP_SALINITY_LLC0090GRID_MONTHLY_V4R4', 'temporal': '2010-01-02,2010-12-31'}\n",
3130
"\n",
32-
"Total number of matching granules: 12\n"
33-
]
34-
},
35-
{
36-
"name": "stderr",
37-
"output_type": "stream",
38-
"text": [
39-
"100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:18<00:00, 1.55s/it]"
40-
]
41-
},
42-
{
43-
"name": "stdout",
44-
"output_type": "stream",
45-
"text": [
31+
"Total number of matching granules: 12\n",
32+
"DL Progress: 100%|#########################| 12/12 [00:15<00:00, 1.29s/it]\n",
4633
"\n",
4734
"=====================================\n",
4835
"total downloaded: 208.75 Mb\n",
49-
"avg download speed: 11.2 Mb/s\n"
50-
]
51-
},
52-
{
53-
"name": "stderr",
54-
"output_type": "stream",
55-
"text": [
56-
"\n"
36+
"avg download speed: 13.48 Mb/s\n",
37+
"Time spent = 15.480218887329102 seconds\n"
5738
]
5839
}
5940
],
@@ -68,7 +49,7 @@
6849
"# download files (granules) containing 2010 monthly mean temperatures\n",
6950
"curr_shortname = \"ECCO_L4_TEMP_SALINITY_LLC0090GRID_MONTHLY_V4R4\"\n",
7051
"ecco_podaac_download(ShortName=curr_shortname,\\\n",
71-
" StartDate=\"2010-01-02\",EndDate=\"2010-12-31\",\\\n",
52+
" StartDate=\"2010-01-01\",EndDate=\"2010-12-31\",\\\n",
7253
" download_root_dir=ECCO_dir,n_workers=6,force_redownload=False)"
7354
]
7455
},
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/Tutorial_Python3_Downloading_ECCO_Subsets.ipynb

doc/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ The `ecco_v4_py`_ package used in this tutorial was inspired by the `xmitgcm`_ p
2929
fields
3030
Installing_Python_and_Python_Packages
3131
Downloading_ECCO_Datasets_from_PODAAC_Python.ipynb
32+
Downloading_Subsets_of_ECCO_Datasets.ipynb
3233
Tutorial_wget_Command_Line_HTTPS_Downloading_ECCO_Datasets_from_PODAAC
3334
Tutorial_Introduction
3435

0 commit comments

Comments
 (0)