Skip to content

Commit 1115167

Browse files
thodson-usgsclaude
andauthored
Stabilize NWIS Tests and Improve 5xx Error Handling (#223)
* Deprecate defunct NWIS functions, update tests, and improve 5xx error handling * Refactor NWIS tests to use external JSON mock data * Fix NWIS test regression and resolve WQP DtypeWarning * Update nwis.get_record * Optimize WaterData pagination and centralize parameter handling * Clean up review findings from ci-fix --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5b3766a commit 1115167

10 files changed

Lines changed: 458 additions & 225 deletions

File tree

dataretrieval/nwis.py

Lines changed: 53 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import warnings
1010
from io import StringIO
11+
from json import JSONDecodeError
1112

1213
import pandas as pd
1314
import requests
@@ -44,6 +45,26 @@
4445
_CRS = "EPSG:4269"
4546

4647

48+
def _parse_json_or_raise(response: requests.Response) -> pd.DataFrame:
49+
"""Parse a JSON NWIS response, raising a helpful error on HTML responses."""
50+
try:
51+
return _read_json(response.json())
52+
except (ValueError, JSONDecodeError) as e:
53+
text_lower = response.text.lower()
54+
content_type = response.headers.get("Content-Type", "").lower()
55+
if (
56+
"<html>" in text_lower
57+
or "<!doctype" in text_lower
58+
or "text/html" in content_type
59+
):
60+
raise ValueError(
61+
f"Received HTML response instead of JSON from {response.url} "
62+
f"(Status: {response.status_code}). This often indicates "
63+
"that the service is currently unavailable."
64+
) from e
65+
raise
66+
67+
4768
def format_response(
4869
df: pd.DataFrame, service: str | None = None, **kwargs
4970
) -> pd.DataFrame:
@@ -481,7 +502,7 @@ def get_dv(
481502
kwargs["multi_index"] = multi_index
482503

483504
response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
484-
df = _read_json(response.json())
505+
df = _parse_json_or_raise(response)
485506

486507
return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
487508

@@ -667,7 +688,7 @@ def get_iv(
667688
service="iv", format="json", ssl_check=ssl_check, **kwargs
668689
)
669690

670-
df = _read_json(response.json())
691+
df = _parse_json_or_raise(response)
671692
return format_response(df, **kwargs), NWIS_Metadata(response, **kwargs)
672693

673694

@@ -840,11 +861,11 @@ def get_record(
840861
- 'iv' : instantaneous data
841862
- 'dv' : daily mean data
842863
- 'site' : site description
843-
- 'measurements' : discharge measurements
864+
- 'measurements' : (defunct) use `waterdata.get_field_measurements`
844865
- 'peaks': discharge peaks
845-
- 'gwlevels': groundwater levels
846-
- 'pmcodes': get parameter codes
847-
- 'water_use': get water use data
866+
- 'gwlevels': (defunct) use `waterdata.get_field_measurements`
867+
- 'pmcodes': (defunct) use `get_reference_table`
868+
- 'water_use': (defunct) no replacement available
848869
- 'ratings': get rating table
849870
- 'stat': get statistics
850871
ssl_check: bool, optional
@@ -870,29 +891,10 @@ def get_record(
870891
>>> # Get site description for site 01585200
871892
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="site")
872893
873-
>>> # Get discharge measurements for site 01585200
874-
>>> df = dataretrieval.nwis.get_record(
875-
... sites="01585200", service="measurements"
876-
... )
877894
878895
>>> # Get discharge peaks for site 01585200
879896
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="peaks")
880897
881-
>>> # Get latest groundwater level for site 434400121275801
882-
>>> df = dataretrieval.nwis.get_record(
883-
... sites="434400121275801", service="gwlevels"
884-
... )
885-
886-
>>> # Get information about the discharge parameter code
887-
>>> df = dataretrieval.nwis.get_record(
888-
... service="pmcodes", parameterCd="00060"
889-
... )
890-
891-
>>> # Get water use data for livestock nationally in 2010
892-
>>> df = dataretrieval.nwis.get_record(
893-
... service="water_use", years="2010", categories="L"
894-
... )
895-
896898
>>> # Get rating table for USGS streamgage 01585200
897899
>>> df = dataretrieval.nwis.get_record(sites="01585200", service="ratings")
898900
@@ -907,6 +909,18 @@ def get_record(
907909
"""
908910
_check_sites_value_types(sites)
909911

912+
defunct_replacements = {
913+
"measurements": "`waterdata.get_field_measurements`",
914+
"gwlevels": "`waterdata.get_field_measurements`",
915+
"pmcodes": "`waterdata.get_reference_table`",
916+
"water_use": "no replacement available",
917+
}
918+
if service in defunct_replacements:
919+
raise NameError(
920+
f"The NWIS service '{service}' is no longer supported by "
921+
f"get_record. Use {defunct_replacements[service]} instead."
922+
)
923+
910924
if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES:
911925
raise TypeError(f"Unrecognized service: {service}")
912926

@@ -936,43 +950,6 @@ def get_record(
936950
df, _ = get_info(sites=sites, ssl_check=ssl_check, **kwargs)
937951
return df
938952

939-
elif service == "measurements":
940-
df, _ = get_discharge_measurements(
941-
site_no=sites, begin_date=start, end_date=end, ssl_check=ssl_check, **kwargs
942-
)
943-
return df
944-
945-
elif service == "peaks":
946-
df, _ = get_discharge_peaks(
947-
site_no=sites,
948-
begin_date=start,
949-
end_date=end,
950-
multi_index=multi_index,
951-
ssl_check=ssl_check,
952-
**kwargs,
953-
)
954-
return df
955-
956-
elif service == "gwlevels":
957-
df, _ = get_gwlevels(
958-
sites=sites,
959-
startDT=start,
960-
endDT=end,
961-
multi_index=multi_index,
962-
datetime_index=datetime_index,
963-
ssl_check=ssl_check,
964-
**kwargs,
965-
)
966-
return df
967-
968-
elif service == "pmcodes":
969-
df, _ = get_pmcodes(ssl_check=ssl_check, **kwargs)
970-
return df
971-
972-
elif service == "water_use":
973-
df, _ = get_water_use(state=state, ssl_check=ssl_check, **kwargs)
974-
return df
975-
976953
elif service == "ratings":
977954
df, _ = get_ratings(site=sites, ssl_check=ssl_check, **kwargs)
978955
return df
@@ -1167,8 +1144,8 @@ class NWIS_Metadata(BaseMetadata):
11671144
Site information if the query included `site_no`, `sites`, `stateCd`,
11681145
`huc`, `countyCd` or `bBox`. `site_no` is preferred over `sites` if
11691146
both are present.
1170-
variable_info: tuple[pd.DataFrame, NWIS_Metadata] | None
1171-
Variable information if the query included `parameterCd`.
1147+
variable_info: None
1148+
Deprecated. Accessing variable_info via NWIS_Metadata is deprecated.
11721149
11731150
"""
11741151

@@ -1232,7 +1209,15 @@ def site_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
12321209
return None # don't set metadata site_info attribute
12331210

12341211
@property
1235-
def variable_info(self) -> tuple[pd.DataFrame, BaseMetadata] | None:
1236-
# define variable_info metadata based on parameterCd if available
1237-
if "parameterCd" in self._parameters:
1238-
return get_pmcodes(parameterCd=self._parameters["parameterCd"])
1212+
def variable_info(self) -> None:
1213+
"""
1214+
Deprecated. Accessing variable_info via NWIS_Metadata is deprecated.
1215+
Returns None.
1216+
"""
1217+
warnings.warn(
1218+
"Accessing variable_info via NWIS_Metadata is deprecated as "
1219+
"it relies on the defunct get_pmcodes function.",
1220+
DeprecationWarning,
1221+
stacklevel=2,
1222+
)
1223+
return None

dataretrieval/utils.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import warnings
6+
from collections.abc import Iterable
67

78
import pandas as pd
89
import requests
@@ -39,14 +40,13 @@ def to_str(listlike, delimiter=","):
3940
'0+10+42'
4041
4142
"""
42-
if isinstance(listlike, list):
43-
return delimiter.join([str(x) for x in listlike])
43+
if isinstance(listlike, str):
44+
return listlike
4445

45-
elif isinstance(listlike, (pd.core.series.Series, pd.core.indexes.base.Index)):
46-
return delimiter.join(listlike.tolist())
46+
if isinstance(listlike, Iterable):
47+
return delimiter.join(map(str, listlike))
4748

48-
elif isinstance(listlike, str):
49-
return listlike
49+
return None
5050

5151

5252
def format_datetime(df, date_field, time_field, tz_field):
@@ -212,6 +212,11 @@ def query(url, payload, delimiter=",", ssl_check=True):
212212
+ f"API response reason: {_reason}. Pseudo-code example of how to "
213213
+ f"split your query: \n {_example}"
214214
)
215+
elif response.status_code in [500, 502, 503]:
216+
raise ValueError(
217+
f"Service Unavailable: {response.status_code} {response.reason}. "
218+
+ f"The service at {response.url} may be down or experiencing issues."
219+
)
215220

216221
if response.text.startswith("No sites/data"):
217222
raise NoSitesError(response.url)

dataretrieval/waterdata/api.py

Lines changed: 14 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
SAMPLES_URL,
2727
_check_profiles,
2828
_default_headers,
29+
_get_args,
2930
get_ogc_data,
3031
get_stats_data,
3132
)
@@ -208,11 +209,7 @@ def get_daily(
208209
output_id = "daily_id"
209210

210211
# Build argument dictionary, omitting None values
211-
args = {
212-
k: v
213-
for k, v in locals().items()
214-
if k not in {"service", "output_id"} and v is not None
215-
}
212+
args = _get_args(locals())
216213

217214
return get_ogc_data(args, output_id, service)
218215

@@ -378,11 +375,7 @@ def get_continuous(
378375
output_id = "continuous_id"
379376

380377
# Build argument dictionary, omitting None values
381-
args = {
382-
k: v
383-
for k, v in locals().items()
384-
if k not in {"service", "output_id"} and v is not None
385-
}
378+
args = _get_args(locals())
386379

387380
return get_ogc_data(args, output_id, service)
388381

@@ -673,11 +666,7 @@ def get_monitoring_locations(
673666
output_id = "monitoring_location_id"
674667

675668
# Build argument dictionary, omitting None values
676-
args = {
677-
k: v
678-
for k, v in locals().items()
679-
if k not in {"service", "output_id"} and v is not None
680-
}
669+
args = _get_args(locals())
681670

682671
return get_ogc_data(args, output_id, service)
683672

@@ -893,11 +882,7 @@ def get_time_series_metadata(
893882
output_id = "time_series_id"
894883

895884
# Build argument dictionary, omitting None values
896-
args = {
897-
k: v
898-
for k, v in locals().items()
899-
if k not in {"service", "output_id"} and v is not None
900-
}
885+
args = _get_args(locals())
901886

902887
return get_ogc_data(args, output_id, service)
903888

@@ -1069,11 +1054,7 @@ def get_latest_continuous(
10691054
output_id = "latest_continuous_id"
10701055

10711056
# Build argument dictionary, omitting None values
1072-
args = {
1073-
k: v
1074-
for k, v in locals().items()
1075-
if k not in {"service", "output_id"} and v is not None
1076-
}
1057+
args = _get_args(locals())
10771058

10781059
return get_ogc_data(args, output_id, service)
10791060

@@ -1247,11 +1228,7 @@ def get_latest_daily(
12471228
output_id = "latest_daily_id"
12481229

12491230
# Build argument dictionary, omitting None values
1250-
args = {
1251-
k: v
1252-
for k, v in locals().items()
1253-
if k not in {"service", "output_id"} and v is not None
1254-
}
1231+
args = _get_args(locals())
12551232

12561233
return get_ogc_data(args, output_id, service)
12571234

@@ -1424,11 +1401,7 @@ def get_field_measurements(
14241401
output_id = "field_measurement_id"
14251402

14261403
# Build argument dictionary, omitting None values
1427-
args = {
1428-
k: v
1429-
for k, v in locals().items()
1430-
if k not in {"service", "output_id"} and v is not None
1431-
}
1404+
args = _get_args(locals())
14321405

14331406
return get_ogc_data(args, output_id, service)
14341407

@@ -1735,11 +1708,8 @@ def get_samples(
17351708

17361709
_check_profiles(service, profile)
17371710

1738-
params = {
1739-
k: v
1740-
for k, v in locals().items()
1741-
if k not in ["ssl_check", "service", "profile"] and v is not None
1742-
}
1711+
# Build argument dictionary, omitting None values
1712+
params = _get_args(locals(), exclude={"ssl_check", "profile"})
17431713

17441714
params.update({"mimeType": "text/csv"})
17451715

@@ -1879,11 +1849,8 @@ def get_stats_por(
18791849
... end_date="01-31",
18801850
... )
18811851
"""
1882-
params = {
1883-
k: v
1884-
for k, v in locals().items()
1885-
if k not in ["expand_percentiles"] and v is not None
1886-
}
1852+
# Build argument dictionary, omitting None values
1853+
params = _get_args(locals(), exclude={"expand_percentiles"})
18871854

18881855
return get_stats_data(
18891856
args=params, service="observationNormals", expand_percentiles=expand_percentiles
@@ -2011,11 +1978,8 @@ def get_stats_date_range(
20111978
... computation_type=["minimum", "maximum"],
20121979
... )
20131980
"""
2014-
params = {
2015-
k: v
2016-
for k, v in locals().items()
2017-
if k not in ["expand_percentiles"] and v is not None
2018-
}
1981+
# Build argument dictionary, omitting None values
1982+
params = _get_args(locals(), exclude={"expand_percentiles"})
20191983

20201984
return get_stats_data(
20211985
args=params,

0 commit comments

Comments
 (0)