ECCO-GROUP
diff --git a/‎Cloud_Setup/jupyter_env_setup.sh‎
Lines changed: 7 additions & 12 deletions b/‎Cloud_Setup/jupyter_env_setup.sh‎
Lines changed: 7 additions & 12 deletions
diff --git a/‎Cloud_Setup/jupyter_lab_start.sh‎
Lines changed: 2 additions & 2 deletions b/‎Cloud_Setup/jupyter_lab_start.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/Tutorial_Python3_Downloading_ECCO_Subsets.ipynb‎
Lines changed: 1 addition & 1 deletion b/‎ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/Tutorial_Python3_Downloading_ECCO_Subsets.ipynb‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/Tutorial_Python3_Jupyter_Notebook_Downloading_ECCO_Datasets_from_PODAAC.ipynb‎
Lines changed: 3 additions & 3 deletions b/‎ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/Tutorial_Python3_Jupyter_Notebook_Downloading_ECCO_Datasets_from_PODAAC.ipynb‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎ECCO-ACCESS/ecco_s3_retrieve.py‎
Lines changed: 40 additions & 19 deletions b/‎ECCO-ACCESS/ecco_s3_retrieve.py‎
Lines changed: 40 additions & 19 deletions
@@ -29,29 +29,22 @@ echo -e "${red_start}Installed wget${nocolor_start}"
 sudo dnf install tmux -y
 echo -e "${red_start}Installed tmux${nocolor_start}"
 
-# retrieve and install miniforge in /tmp/
-# assuming EBS volume is already attached to instance
+# retrieve and install miniforge
 echo -e "${red_start}Starting Miniforge3 installation${nocolor_start}"
 mkdir -p /tmp
 wget "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" -O /tmp/Miniforge3.sh
-bash /tmp/Miniforge3.sh -b -p /tmp/conda
+bash /tmp/Miniforge3.sh -b -p ~/conda
 rm -f /tmp/Miniforge.sh
-source "/tmp/conda/etc/profile.d/conda.sh"
-source "/tmp/conda/etc/profile.d/mamba.sh"
+source ~/conda/bin/activate
 
 echo -e "${red_start}Completed Miniforge3 installation${nocolor_start}"
 
 # add conda and mamba to path
 mamba init
 
-# set paths to environment and package directories
-printf '\n# set conda environment and package directories' >> ~/.bashrc
-printf '\nexport CONDA_ENVS_PATH=/tmp/conda/envs' >> ~/.bashrc
-printf '\nexport CONDA_PKGS_DIRS=/tmp/conda/pkgs' >> ~/.bashrc
-source ~/.bashrc
+# # set paths to environment and package directories
 
-# create jupyter environment under /tmp/conda/envs/
-# (in EBS storage to save space in home directory)
+# create jupyter environment
 mamba create --name jupyter python=3.11 -y
 echo -e "${red_start}Created jupyter environment${nocolor_start}"
 
@@ -94,6 +87,7 @@ mamba install notebook -y
 mamba install progressbar -y
 mamba install gsw -y
 mamba install nco -y
+mamba install pympler -y
 
 # install remaining packages using pip
 # (mamba installs tend to get killed on t2.micro)
@@ -106,6 +100,7 @@ pip install ecco_v4_py
 
 echo -e "${red_start}Completed Python package installations${nocolor_start}"
 
+
 echo -e "${red_start}Setting up NASA Earthdata authentication${nocolor_start}"
 # NASA Earthdata authentication
 # check if credentials are already archived in ~/.netrc, and if not then prompt the user for them
 
@@ -7,7 +7,7 @@ red_start='\033[0;31m'
 blue_start='\033[0;34m'
 nocolor_start='\033[0m'
 
-source /tmp/conda/bin/activate
+source ~/conda/bin/activate
 conda activate jupyter
 
 # Start configuration for Jupyter lab
@@ -20,7 +20,7 @@ jlab_start="jupyter Space lab Space --no-browser Space --autoreload Space --port
 tmux new -d -s jupyterlab
 
 # Execute commands in tmux window using send-keys
-tmux send-keys -t jupyterlab source Space /tmp/conda/bin/activate Enter
+tmux send-keys -t jupyterlab source Space ~/conda/bin/activate Enter
 tmux send-keys -t jupyterlab conda Space activate Space jupyter Enter
 tmux send-keys -t jupyterlab ${jlab_start} Enter
 
 
@@ -21,7 +21,7 @@
     "\n",
     "\\- Time subsetting in non-continuous ranges (e.g., downloading boreal summer files from multiple years)\n",
     "\n",
-    "> Currently the `ecco_download` module is a [standalone download](https://raw.githubusercontent.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/master/ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/ecco_download.py). However, we hope to include it in the `ecco_v4_py` package soon so that it does not need to be downloaded or imported into your workspace separately. Stay tuned!\n",
+    "> Currently the `ecco_download` module is a [standalone download](https://raw.githubusercontent.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/master/ECCO-ACCESS/ecco_download.py). However, we hope to include it in the `ecco_v4_py` package soon so that it does not need to be downloaded or imported into your workspace separately. Stay tuned!\n",
     "\n",
     "## Getting Started\n",
     "\n",
 
@@ -7,15 +7,15 @@
    "source": [
     "# Using Python to Download ECCO Datasets\n",
     "\n",
-    "**Note: This notebook was modified by Andrew Delman (updated 2023-12-22) from the tutorial on the** [ECCO-GROUP Github](https://github.com/ECCO-GROUP/ECCO-ACCESS/blob/master/PODAAC/Downloading_ECCO_datasets_from_PODAAC/Tutorial_Python3_Jupyter_Notebook_Downloading_ECCO_Datasets_from_PODAAC.ipynb) **by Jack McNelis and Ian Fenty, Version 1.1 dated 2021-06-25.**\n",
+    "**Note: This notebook was modified by Andrew Delman (updated 2024-04-04) from the tutorial on the** [ECCO-GROUP Github](https://github.com/ECCO-GROUP/ECCO-ACCESS/blob/master/PODAAC/Downloading_ECCO_datasets_from_PODAAC/Tutorial_Python3_Jupyter_Notebook_Downloading_ECCO_Datasets_from_PODAAC.ipynb) **by Jack McNelis and Ian Fenty, Version 1.1 dated 2021-06-25.**\n",
     "\n",
     "This Jupyter notebook provides instructions and Python code for downloading a set of granules (files) for an ECCO \"Dataset\" hosted by PO.DAAC. The focus is on downloading datasets in the lat-lon-cap 90 (llc90) native grid of the ECCO v4 simulations, since the tutorials mostly use output on the native grid. If you're new to this grid geometry, don't worry! The ecco_v4_py package discussed in the previous tutorial will help you load the ECCO output, make computations, and plot the results while hardly needing to interact with the model grid.\n",
     "\n",
     "The example ECCO Dataset used in this tutorial is \"ECCO Sea Surface Height - Daily Mean llc90 Grid (Version 4 Release 4)\" which provides daily mean sea surface height on the native llc90 grid ([10.5067/ECL5D-SSH44](https://doi.org/10.5067/ECL5D-SSH44)).\n",
     "\n",
     "These data can also be accessed directly through [NASA Earthdata search](https://search.earthdata.nasa.gov/search?fpj=ECCO). You will need to set up a NASA Earthdata account if you do not have one already. There is [a nice graphical interface](https://www.ecco-group.org/datasets.htm) to sort through the ECCO datasets available from PO.DAAC.\n",
     "\n",
-    "> Tip: if you are already familiar with Python and ECCO output, and have edited your `netrc` file as described [below](#Earthdata-Login-Requirements), you can download the [ECCO_download](https://raw.githubusercontent.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/master/ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/ecco_download.py) module. Then import it to your code using `from ecco_download import *` and call the function `ecco_podaac_download` to start downloading. You will need to know the ShortName of the dataset you want, which you can look up using the variable lists [here](https://github.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/tree/master/varlist). To see the syntax of the `ecco_podaac_download` function use `help(ecco_podaac_download)`, or see the end of this tutorial for an example.\n",
+    "> Tip: if you are already familiar with Python and ECCO output, and have edited your `netrc` file as described [below](#Earthdata-Login-Requirements), you can download the [ECCO_download](https://raw.githubusercontent.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/master/ECCO-ACCESS/ecco_download.py) module. Then import it to your code using `from ecco_download import *` and call the function `ecco_podaac_download` to start downloading. You will need to know the ShortName of the dataset you want, which you can look up using the variable lists [here](https://github.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/tree/master/varlist). To see the syntax of the `ecco_podaac_download` function use `help(ecco_podaac_download)`, or see the end of this tutorial for an example.\n",
     "\n",
     "\n",
     "## Getting Started\n",
@@ -938,7 +938,7 @@
     "\n",
     "If you've made it this far, that means you can now download and plot any available ECCOv4r4 variable on your local machine. Woohoo! But to make it easier in the future, you can also download the following Python module that runs the downloading routines contained in this notebook.\n",
     "\n",
-    "[ecco_download module](https://raw.githubusercontent.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/master/ECCO-ACCESS/Downloading_ECCO_datasets_from_PODAAC/ecco_download.py)\n",
+    "[ecco_download module](https://raw.githubusercontent.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/master/ECCO-ACCESS/ecco_download.py)\n",
     "\n",
     "You can save this file either in the same directory where you store the tutorial notebooks, or a different directory that you then add to your path using sys.path.append. Then you can download using the `ecco_podaac_download` function. To see the syntax of how this is used, let's invoke the module to download daily SSH data for the week 2000-01-08 to 2000-01-14:"
    ]
 
@@ -86,19 +86,37 @@ def get_results(params: dict, headers: dict=None):
                                 headers=headers).json()
         return response    
 
-    def get_granules(params: dict):
-        response = get_results(params=params)
-        if 'feed' in response.keys():
-            s3_files_list = []
-            for curr_entry in response['feed']['entry']:
-                for curr_link in curr_entry['links']:
-                    if "direct download access via S3" in curr_link['title']:
-                        s3_files_list.append(curr_link['href'])
-                        break
-        elif 'errors' in response.keys():
-            raise Exception(response['errors'][0])
+    def get_granules(params: dict, ShortName: str, SingleDay_flag: bool):
+        time_start = np.array([]).astype('datetime64[ns]')
+        s3_files_list = []
+        completed_query = False
+        while completed_query == False:
+            response = get_results(params=params)
+            if 'feed' in response.keys():
+                for curr_entry in response['feed']['entry']:
+                    time_start = np.append(time_start,np.datetime64(curr_entry['time_start'],'ns'))
+                    for curr_link in curr_entry['links']:
+                        if "direct download access via S3" in curr_link['title']:
+                            s3_files_list.append(curr_link['href'])
+                            break
+            elif 'errors' in response.keys():
+                raise Exception(response['errors'][0])
+            
+            if len(response['feed']['entry']) < 2000:
+                completed_query = True
+            else:
+                # do another CMR search since previous search hit the allowed maximum
+                # number of entries (2000)
+                params['temporal'] = str(np.datetime64(response['feed']['entry'][-1]['time_end'],'D')\
+                                         + np.timedelta64(1,'D'))+params['temporal'][10:]
+
+        # reduce granule list to single day if only one day in requested range
+        if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
+            if ((SingleDay_flag == True) and (len(s3_files_list) > 1)):
+                day_index = np.argmin(np.abs(time_start - np.datetime64(StartDate,'D')))
+                s3_files_list = s3_files_list[day_index:(day_index+1)]
 
-        return s3_files_list    
+        return s3_files_list
 
 
     # # Adjust StartDate and EndDate to CMR query values
@@ -130,12 +148,12 @@ def get_granules(params: dict):
                  +'Program will exit now !\n')
 
 
-    # for monthly and daily datasets, do not include the month or day before
+    SingleDay_flag = False
     if (('MONTHLY' in ShortName) or ('DAILY' in ShortName)):
         if np.datetime64(EndDate,'D') - np.datetime64(StartDate,'D') \
           > np.timedelta64(1,'D'):
+            # for monthly and daily datasets, do not include the month or day before
             StartDate = str(np.datetime64(StartDate,'D') + np.timedelta64(1,'D'))
-            SingleDay_flag = False
         else:
             # for single day ranges we need to make the adjustment
             # after the CMR request
@@ -162,8 +180,9 @@ def get_granules(params: dict):
     ### Query CMR for the desired ECCO Dataset
 
     # grans means 'granules', PO.DAAC's term for individual files in a dataset
-    s3_files_list = get_granules(input_search_params)
-
+    s3_files_list = get_granules(input_search_params,ShortName,SingleDay_flag)
+    
+    
     return s3_files_list
 
 
@@ -499,8 +518,9 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5
 
     pass
 
-    import shutil    
-
+    import shutil
+    
+    
     # force max_avail_frac to be within limits [0,0.9]
     max_avail_frac = np.fmin(np.fmax(max_avail_frac,0),0.9)
 
@@ -529,9 +549,10 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,max_avail_frac=0.5
 
         # for snapshot datasets with monthly snapshot_interval, only include snapshots at beginning/end of months
         if (('SNAPSHOT' in curr_shortname) and (snapshot_interval == 'monthly')):
+            import re
             s3_files_list_copy = list(tuple(s3_files_list))
             for s3_file in s3_files_list:
-                snapshot_date = re.findall("_[0-9]{4}-[0-9]{2}-[0-9]{2}",url)[0][1:]
+                snapshot_date = re.findall("_[0-9]{4}-[0-9]{2}-[0-9]{2}",s3_file)[0][1:]
                 if snapshot_date[8:] != '01':
                     s3_files_list_copy.remove(s3_file)
             s3_files_list = s3_files_list_copy