Skip to content

Commit 05f254c

Browse files
authored
Merge pull request #83 from andrewdelman/docker
Docker setup
2 parents 5f1b95b + 792e666 commit 05f254c

8 files changed

Lines changed: 693 additions & 0 deletions

Docker/Dockerfile

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# syntax=docker/dockerfile:1
2+
# Dockerfile to build environment and run ECCOv4 Python tutorials.
3+
# Adapted from pangeo/base-image.
4+
FROM ubuntu:22.04
5+
6+
ARG NB_USER
7+
ARG NB_UID
8+
9+
# Setup environment to match variables set by repo2docker as much as possible
10+
# The name of the conda environment into which the requested packages are installed
11+
12+
ENV CONDA_ENV=jupyter \
13+
# Tell apt-get to not block installs by asking for interactive human input
14+
# Use /bin/bash as shell, not the default /bin/sh (arrow keys, etc don't work then)
15+
SHELL=/bin/bash \
16+
# Setup locale to be UTF-8, avoiding gnarly hard to debug encoding errors
17+
LANG=C.UTF-8 \
18+
LC_ALL=C.UTF-8 \
19+
# Install conda in the same place repo2docker does
20+
CONDA_DIR=/srv/conda
21+
22+
23+
# All env vars that reference other env vars need to be in their own ENV block
24+
# Path to the python environment where the jupyter notebook packages are installed
25+
ENV NB_PYTHON_PREFIX=${CONDA_DIR}/envs/${CONDA_ENV} \
26+
# Home directory of our non-root user
27+
HOME=/home/${NB_USER}
28+
29+
# Add both our notebook env as well as default conda installation to $PATH
30+
# Thus, when we start a `python` process (for kernels, or notebooks, etc),
31+
# it loads the python in the notebook conda environment, as that comes
32+
# first here.
33+
ENV PATH=${NB_PYTHON_PREFIX}/bin:${CONDA_DIR}/bin:${PATH}
34+
35+
# Ask dask to read config from ${CONDA_DIR}/etc rather than
36+
# the default of /etc, since the non-root user can write
37+
# to ${CONDA_DIR}/etc but not to /etc
38+
ENV DASK_ROOT_CONFIG=${CONDA_DIR}/etc
39+
40+
41+
RUN echo "Creating ${NB_USER} user..." \
42+
# Create a group for the user to be part of, with gid same as uid
43+
&& groupadd --gid ${NB_UID} ${NB_USER} \
44+
# Create non-root user, with given gid, uid and create $HOME
45+
&& useradd --create-home --gid ${NB_UID} --no-log-init --uid ${NB_UID} ${NB_USER}
46+
47+
# Make sure that /srv is owned by non-root user, so we can install things there
48+
USER root
49+
RUN chown -R ${NB_USER}:${NB_USER} /srv
50+
51+
# Run conda activate each time a bash shell starts, so users don't have to manually type conda activate
52+
# Note this is only read by shell, but not by the jupyter notebook - that relies
53+
# on us starting the correct `python` process, which we do by adding the notebook conda environment's
54+
# bin to PATH earlier ($NB_PYTHON_PREFIX/bin)
55+
RUN echo ". ${CONDA_DIR}/etc/profile.d/conda.sh ; conda activate ${CONDA_ENV}" > /etc/profile.d/init_conda.sh
56+
57+
# Install basic apt packages
58+
RUN echo "Installing Apt-get packages..." \
59+
&& apt-get update --fix-missing > /dev/null \
60+
&& apt-get install -y apt-utils wget tmux zip tzdata > /dev/null \
61+
&& apt-get clean \
62+
&& rm -rf /var/lib/apt/lists/*
63+
64+
# Add TZ configuration - https://github.com/PrefectHQ/prefect/issues/3061
65+
ENV TZ=UTC
66+
# ========================
67+
68+
USER ${NB_USER}
69+
WORKDIR ${HOME}
70+
71+
# Install latest mambaforge in ${CONDA_DIR}
72+
RUN echo "Installing Miniforge..." \
73+
&& URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-$(uname -m).sh" \
74+
&& wget --quiet ${URL} -O installer.sh \
75+
&& /bin/bash installer.sh -u -b -p ${CONDA_DIR} \
76+
&& rm installer.sh \
77+
&& mamba clean -afy \
78+
# After installing the packages, we cleanup some unnecessary files
79+
# to try reduce image size - see https://jcristharif.com/conda-docker-tips.html
80+
# Although we explicitly do *not* delete .pyc files, as that seems to slow down startup
81+
# quite a bit unfortunately - see https://github.com/2i2c-org/infrastructure/issues/2047
82+
&& find ${CONDA_DIR} -follow -type f -name '*.a' -delete
83+
84+
85+
COPY --chown=${NB_USER}:${NB_USER} ./ECCO-v4-Python-Tutorial ${HOME}/ECCO-v4-Python-Tutorial
86+
87+
RUN echo "Using environment.yml to create conda environment ${CONDA_ENV}" & \
88+
mamba env create --name ${CONDA_ENV} \
89+
-f ${HOME}/ECCO-v4-Python-Tutorial/Docker/environment.yml
90+
91+
# create symlink from home directory to jupyter_lab_start_docker.sh
92+
RUN ln -s ${HOME}/ECCO-v4-Python-Tutorial/Docker/jupyter_lab_start_docker.sh \
93+
${HOME}/jupyter_lab_start_docker.sh
94+
95+
EXPOSE 8888
96+
97+
# start jupyter lab inside the container
98+
ENTRYPOINT ["${HOME}/jupyter_lab_start_docker.sh"]

Docker/README.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Run tutorials on an AWS EC2 instance using a Docker container
2+
3+
(valid as of 2024-08-26)
4+
5+
## Getting started
6+
7+
Before setting up the Docker container, you will need to start an EC2 instance. Please follow the [AWS Cloud: getting started](https://ecco-v4-python-tutorial.readthedocs.io/AWS_Cloud_getting_started.html) tutorial up to the part in Step 3 where the tutorial repository is cloned using `git clone`. However, do not run `jupyter_env_setup.sh`. Instead, run `sudo dnf install docker` to install the Docker software on your instance.
8+
9+
## Build the Docker image
10+
11+
The `ECCO-v4-Python-Tutorial/Docker` directory has the files that you need to build a Docker image and then run it and use the tutorials. In that directory, run `./docker_image_build.sh` and it will build a Docker image named `localhost/ecco_tut_image:latest`.
12+
13+
The build process takes a few minutes, and the image will occupy 4-5GB of storage, so make sure your instance has sufficient storage. You will also likely need about that much memory to complete the build process, so at least a `large` instance on AWS is strongly recommended.
14+
15+
## Run the Docker image
16+
17+
When the build completes, you will run the image, which will activate a container within your EC2 instance, and start running Jupyter lab in that container. This is done with the following command:
18+
19+
```bash
20+
docker run -it -p 8888:8888 localhost/ecco_tut_image:latest
21+
```
22+
23+
Note the port numbers specified under the `-p` option. The port listed after the colon is the container port, which is always 8888 unless this is changed manually in the `Dockerfile` (on the `EXPOSE 8888` line) prior to building the image. The port before the colon is what the host EC2 instance uses to communicate with the container, and this can be specified differently depending on the user's port availability.
24+
25+
When the command above is run, you will first be queried for NASA Earthdata credentials if those are not already stored in a `~/.netrc` file under your user home directory. After entering the credentials, you will be queried for the container port number (8888 by default unless changed as described above), and an optional password (if no password is entered, none will be needed to log in to Jupyter lab).
26+
27+
As Jupyter lab is launched, you will see a lot of output tagged `ServerApp` or `LabApp`. To free up this window you can press `Ctrl-p` `Ctrl-q`, and the window will escape the container...but importantly, the container is still running. To check the status of Docker containers, run `docker ps -a`.
28+
29+
## Open Jupyter lab in your browser
30+
31+
Now you need to open a connection between your local machine and the EC2 instance with the correct port forwarding. On your local machine you can use any unused port; note that if you are already running Jupyter lab/notebooks locally that port 8888 will likely already be in use. This example uses 9889 as the local port
32+
33+
```bash
34+
ssh -i ~/.ssh/aws_ec2.pem -L 9889:localhost:8888 ec2-user@100.104.70.127
35+
```
36+
37+
and in a browser window on your local machine, access the port you specified before `localhost` above
38+
39+
```bash
40+
http://localhost:9889
41+
```
42+
43+
You will see a screen that asks for a password, but if you didn't enter any before, you can just go ahead and click `Login`. Now you have access to the tutorial repository, and the tutorials are in the directory `Tutorials_as_Jupyter_Notebooks`.
44+
45+
## Re-connect to Jupyter lab in Docker container
46+
47+
If the Docker container is stopped or exited, the Jupyter lab session will also exit. To restart a Docker container use `docker ps -a` to find the container name and ID, and then use
48+
49+
```bash
50+
docker start <container-name or id>
51+
```
52+
53+
to re-start the container. Then you may need to run the following on your instance to re-start Jupyter lab within the container:
54+
55+
```bash
56+
docker exec -it <container-name or id> ~/jupyter_lab_start_docker.sh
57+
```
58+
59+
and you should see the `ServerApp` and `LabApp` output appear indicating that the session has started. Then you can use `Ctrl-p` `Ctrl-q` to escape that window from the container.

Docker/docker_image_build.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
3+
# build docker image with build context in parent user directory
4+
# and Dockerfile in ECCO-v4-Python-Tutorial/Docker subdirectory,
5+
# passing current user info as build arguments
6+
7+
8+
cd /home/${USER}
9+
10+
docker build . \
11+
--build-arg NB_USER=${USER} \
12+
--build-arg NB_UID=$(id -u ${USER}) \
13+
-t ecco_tut_image \
14+
-f ./ECCO-v4-Python-Tutorial/Docker/Dockerfile

Docker/earthdata_auth_docker.sh

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/bin/bash
2+
3+
# Shell script for adding NASA Earthdata authentication credentials to ~/.netrc
4+
# if they are not already in the file
5+
6+
# # Start body of script
7+
8+
red_start='\033[0;31m'
9+
blue_start='\033[0;34m'
10+
nocolor_start='\033[0m'
11+
12+
# Set up NASA Earthdata credential
13+
14+
echo "${red_start}Setting up NASA Earthdata authentication${nocolor_start}"
15+
# NASA Earthdata authentication
16+
# check if credentials are already archived in ~/.netrc, and if not then prompt the user for them
17+
earthdata_cred_stored=0
18+
if [ -f ~/.netrc ]; then
19+
if grep -q "machine urs.earthdata.nasa.gov" ~/.netrc; then
20+
earthdata_cred_stored=1
21+
echo "${red_start}Earthdata credentials already archived ${nocolor_start}"
22+
fi
23+
fi
24+
if [ $earthdata_cred_stored -eq 0 ]; then
25+
if [ -f ~/.netrc ]; then chmod 600 ~/.netrc; fi
26+
read -p 'NASA Earthdata username: ' uservar
27+
read -p 'NASA Earthdata password: ' passvar
28+
echo "machine urs.earthdata.nasa.gov\n login ${uservar}\n password ${passvar}\n" >> ~/.netrc
29+
30+
echo "\n${red_start}NASA Earthdata authentication info archived in ~/.netrc${nocolor_start}"
31+
fi
32+
chmod 400 ~/.netrc

0 commit comments

Comments
 (0)