start notebook

ehinman · ehinman · commit e16924c7f5fd · 2026-01-02T17:03:58.000-06:00
diff --git a/dataretrieval/waterdata/__init__.py b/dataretrieval/waterdata/__init__.py
@@ -11,7 +11,6 @@
 
 # Public API exports
 from .api import (
-    _check_profiles,
     get_codes,
     get_continuous,
     get_daily,
@@ -41,7 +40,6 @@
     "get_reference_table",
     "get_samples",
     "get_time_series_metadata",
-    "_check_profiles",
     "CODE_SERVICES",
     "SERVICES",
     "PROFILES",
diff --git a/dataretrieval/waterdata/api.py b/dataretrieval/waterdata/api.py
@@ -17,15 +17,15 @@
 from dataretrieval.waterdata.types import (
     CODE_SERVICES,
     METADATA_COLLECTIONS,
-    PROFILE_LOOKUP,
     PROFILES,
     SERVICES,
 )
 from dataretrieval.waterdata.utils import (
     SAMPLES_URL,
     get_ogc_data,
     _construct_api_requests,
-    _walk_pages
+    _walk_pages,
+    _check_profiles
 )
 
 # Set up logger for this module
@@ -1703,31 +1703,3 @@ def get_samples(
 
     return df, BaseMetadata(response)
 
-
-def _check_profiles(
-    service: SERVICES,
-    profile: PROFILES,
-) -> None:
-    """Check whether a service profile is valid.
-
-    Parameters
-    ----------
-    service : string
-        One of the service names from the "services" list.
-    profile : string
-        One of the profile names from "results_profiles",
-        "locations_profiles", "activities_profiles",
-        "projects_profiles" or "organizations_profiles".
-    """
-    valid_services = get_args(SERVICES)
-    if service not in valid_services:
-        raise ValueError(
-            f"Invalid service: '{service}'. Valid options are: {valid_services}."
-        )
-
-    valid_profiles = PROFILE_LOOKUP[service]
-    if profile not in valid_profiles:
-        raise ValueError(
-            f"Invalid profile: '{profile}' for service '{service}'. "
-            f"Valid options are: {valid_profiles}."
-        )
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
@@ -4,7 +4,7 @@
 import os
 import re
 from datetime import datetime
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union, get_args
 
 import pandas as pd
 import requests
@@ -13,6 +13,12 @@
 from dataretrieval.utils import BaseMetadata
 from dataretrieval import __version__
 
+from dataretrieval.waterdata.types import (
+    PROFILE_LOOKUP,
+    PROFILES,
+    SERVICES,
+)
+
 try:
     import geopandas as gpd
 
@@ -824,3 +830,31 @@ def get_ogc_data(
     return return_list, metadata
 
 
+def _check_profiles(
+    service: SERVICES,
+    profile: PROFILES,
+) -> None:
+    """Check whether a service profile is valid.
+
+    Parameters
+    ----------
+    service : string
+        One of the service names from the "services" list.
+    profile : string
+        One of the profile names from "results_profiles",
+        "locations_profiles", "activities_profiles",
+        "projects_profiles" or "organizations_profiles".
+    """
+    valid_services = get_args(SERVICES)
+    if service not in valid_services:
+        raise ValueError(
+            f"Invalid service: '{service}'. Valid options are: {valid_services}."
+        )
+
+    valid_profiles = PROFILE_LOOKUP[service]
+    if profile not in valid_profiles:
+        raise ValueError(
+            f"Invalid profile: '{profile}' for service '{service}'. "
+            f"Valid options are: {valid_profiles}."
+        )
+
diff --git a/demos/WaterData_demo.ipynb b/demos/WaterData_demo.ipynb
@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "7d0ca866",
+   "metadata": {},
+   "source": [
+    "# Using the `waterdata` module to pull data from the USGS Water Data APIs\n",
+    "The `waterdata` module will eventually replace the `nwis` module for accessing USGS water data. It leverages the [Water Data APIs](https://api.waterdata.usgs.gov/) to download metadata, daily values, and instantaneous values. \n",
+    "\n",
+    "While the specifics of this transition timeline are hazy, it is advised to switch to the new functions as soon as possible to reduce unexpected interruptions in your workflow.\n",
+    "\n",
+    "As always, please report any issues you encounter on our [Issues](https://github.com/DOI-USGS/dataretrieval-python/issues) page. If you have questions or need help, please reach out to us at comptools@usgs.gov."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fcccb6e8",
+   "metadata": {},
+   "source": [
+    "## Prerequisite: Get your Water Data API key\n",
+    "We highly suggest signing up for your own API key [here](https://api.waterdata.usgs.gov/signup/) to afford yourself higher rate limits and more reliable access to the data. If you opt not to register for an API key, then the number of requests you can make to the Water Data APIs is considerably lower, and if you share an IP address across users or workflows, you may hit those limits even faster. Luckily, registering for an API key is free and easy.\n",
+    "\n",
+    "Once you've copied your API key and saved it in a safe place, you can set it as an environment variable in your python script for the current session:\n",
+    "\n",
+    "```python\n",
+    "import os\n",
+    "os.environ['API_USGS_PAT'] = 'your_api_key_here'\n",
+    "``` \n",
+    "Note that the environment variable name is `API_USGS_PAT`, which stands for \"API USGS Personal Access Token\".\n",
+    "\n",
+    "If you'd like a more permanent repository-specific solution, you can use the `python-dotenv` package to read your API key from a `.env` file in your repository root directory, like this:\n",
+    "\n",
+    "```python\n",
+    "!pip install python-dotenv # only run this line once to install the package in your environment\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()  # this will load the environment variables from the .env file\n",
+    "```\n",
+    "Make sure your `.env` file contains the following line:\n",
+    "```\n",
+    "API_USGS_PAT=your_api_key_here\n",
+    "```\n",
+    "Also, do not commit your `.env` file to version control, as it contains sensitive information. You can add it to your `.gitignore` file to prevent accidental commits."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4a2b3f0f",
+   "metadata": {},
+   "source": [
+    "## Lay of the Land\n",
+    "Now that your API key is configured, it's time to take a 10,000-ft view of the functions in the `waterdata` module.\n",
+    "\n",
+    "### Metadata endpoints\n",
+    "These functions retrieve metadata tables that can be used to refine your data requests.\n",
+    "\n",
+    "- `get_reference_table()` - Not sure which parameter code you're looking for, or which hydrologic unit your study area is in? This function will help you find the right input values for the data endpoints to retrieve the information you want.\n",
+    "- `get_codes()` - Similar to `get_reference_table()`, this function retrieves dataframes containing available input values that correspond to the Samples database.\n",
+    "\n",
+    "### Data endpoints\n",
+    "- `get_daily()` - Daily values for monitoring locations, parameters, stat codes, and more.\n",
+    "- `get_continuous()` - Instantaneous values for monitoring locations, parameters, statistical codes, and more.\n",
+    "- `get_monitoring_locations()`- Monitoring location information such as name, monitoring location ID, latitude, longitude, huc code, site types, and more.\n",
+    "- `get_time_series_metadata()` - Timeseries metadata across monitoring locations, parameter codes, statistical codes, and more. Can be used to answer the question: what types of data are collected at my site(s) of interest and over what time period are/were they collected? \n",
+    "- `get_latest_continuous()` - Latest instantaneous values for requested monitoring locations, parameter codes, statistical codes, and more.\n",
+    "- `get_latest_daily()` - Latest daily values for requested monitoring locations, parameter codes, statistical codes, and more.\n",
+    "- `get_field_measurements()` - Physically measured values (a.k.a discrete) of gage height, discharge, groundwater levels, and more for requested monitoring locations.\n",
+    "- `get_samples()` - Discrete water quality sample results for monitoring locations, observed properties, and more."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "68591b52",
+   "metadata": {},
+   "source": [
+    "## Examples\n",
+    "Let's get into some examples using the functions listed above. First, we need to load the `waterdata` module."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4ca9bb6a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.display import display\n",
+    "from dataretrieval import waterdata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1035ebbb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pcodes,metadata = waterdata.get_reference_table(\"parameter-codes\")\n",
+    "display(pcodes.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "176c665b",
+   "metadata": {},
+   "source": [
+    "What is this `metadata` element? Let's take a look:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "30b1b052",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metadata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1e0eab77",
+   "metadata": {},
+   "source": [
+    "All of these functions return Tuples containing a dataframe and a metadata element containing descriptors about the request made. This `BaseMetadata` object contains the request URL.\n",
+    "\n",
+    "Let's say we want to find all parameter codes relating to streamflow discharge. We can use some string matching to find applicable codes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "665ccb23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "streamflow_pcodes = pcodes[pcodes['parameter_name'].str.contains('streamflow|discharge', case=False, na=False)]\n",
+    "display(streamflow_pcodes[['parameter_code_id', 'parameter_name']])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d9487ee4",
+   "metadata": {},
+   "source": [
+    "Interesting that there are so many different streamflow-related parameter codes! Going on experience, let's use the most common one, `00060`, which is \"Discharge, cubic feet per second\".\n",
+    "\n",
+    "Now that we know which parameter code we want to use, let's find all the stream monitoring locations that have recent discharge data and at least 10 years of daily values in the state of Nebraska. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce4df5fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NE_locations,_ = waterdata.get_monitoring_locations(state_name=\"Nebraska\", site_type_code=\"ST\")\n",
+    "display(NE_locations.head())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "dr-test",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}