Skip to content

Commit 01adcc4

Browse files
committed
Refactor service lookup and add type hints
1 parent fa7b5f7 commit 01adcc4

1 file changed

Lines changed: 85 additions & 74 deletions

File tree

dataretrieval/samples.py

Lines changed: 85 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
"""Functions for downloading data from the USGS Aquarius Samples database (https://waterqualitydata.us)
1+
"""Functions for downloading data from the USGS Aquarius Samples database
2+
(https://waterdata.usgs.gov/download-samples/#dataProfile=site).
23
34
See https://api.waterdata.usgs.gov/samples-data/docs#/ for API reference
4-
55
"""
66

77
from __future__ import annotations
@@ -17,21 +17,51 @@
1717
from dataretrieval.utils import BaseMetadata, to_str
1818

1919
if TYPE_CHECKING:
20-
from typing import Tuple
20+
from typing import Optional, Tuple, Union
2121

2222
from pandas import DataFrame
2323

2424

25-
BASE_URL = "https://api.waterdata.usgs.gov/samples-data"
25+
_BASE_URL = "https://api.waterdata.usgs.gov/samples-data"
26+
27+
_CODE_SERVICES = Literal[
28+
"characteristicgroup",
29+
"characteristics",
30+
"counties",
31+
"countries",
32+
"observedproperty",
33+
"samplemedia",
34+
"sitetype",
35+
"states",
36+
]
37+
2638

2739
_SERVICES = Literal["activities", "locations", "organizations", "projects", "results"]
2840

29-
_PROFILES = {
30-
"activities": Literal["sampact", "actmetric", "actgroup", "count"],
31-
"locations": Literal["site", "count"],
32-
"organizations": Literal["organization", "count"],
33-
"projects": Literal["project", "projectmonitoringlocationweight"],
34-
"results": Literal[
41+
_PROFILES = Literal[
42+
"actgroup",
43+
"actmetric",
44+
"basicbio",
45+
"basicphyschem",
46+
"count",
47+
"fullbio",
48+
"fullphyschem",
49+
"labsampleprep",
50+
"narrow",
51+
"organization",
52+
"project",
53+
"projectmonitoringlocationweight",
54+
"resultdetectionquantitationlimit",
55+
"sampact",
56+
"site",
57+
]
58+
59+
_PROFILE_LOOKUP = {
60+
"activities": ["sampact", "actmetric", "actgroup", "count"],
61+
"locations": ["site", "count"],
62+
"organizations": ["organization", "count"],
63+
"projects": ["project", "projectmonitoringlocationweight"],
64+
"results": [
3565
"fullphyschem",
3666
"basicphyschem",
3767
"fullbio",
@@ -43,44 +73,7 @@
4373
],
4474
}
4575

46-
_ALL_PROFILES = Literal[*[v for k,v in _PROFILES.items()]]
47-
48-
_CODE_SERVICES = Literal[
49-
"characteristicgroup",
50-
"characteristics",
51-
"counties",
52-
"countries",
53-
"observedproperty",
54-
"samplemedia",
55-
"sitetype",
56-
"states",
57-
]
58-
59-
_SAMPLES_KWARGS = Literal[
60-
"activityMediaName",
61-
"activityStartDateLower",
62-
"activityStartDateUpper",
63-
"activityTypeCode",
64-
"boundingBox",
65-
"characteristic",
66-
"characteristicGroup",
67-
"characteristicUserSupplied",
68-
"countyFips",
69-
"countryFips",
70-
"hydrologicUnit",
71-
"monitoringLocationIdentifier",
72-
"organizationIdentifier",
73-
"pointLocationLatitude",
74-
"pointLocationLongitude",
75-
"pointLocationWithinMiles",
76-
"projectIdentifier",
77-
"recordIdentifierUserSupplied",
78-
"siteTypeCode",
79-
"siteTypeName",
80-
"stateFips",
81-
"usgsPCode",
82-
]
83-
76+
8477
def get_codes(code_service: _CODE_SERVICES) -> DataFrame:
8578
"""Return codes from a Samples code service.
8679
@@ -98,7 +91,7 @@ def get_codes(code_service: _CODE_SERVICES) -> DataFrame:
9891
f"Valid options are: {valid_code_services}."
9992
)
10093

101-
url = f"{BASE_URL}/codeservice/{code_service}?mimeType=application%2Fjson"
94+
url = f"{_BASE_URL}/codeservice/{code_service}?mimeType=application%2Fjson"
10295

10396
response = requests.get(url)
10497

@@ -112,13 +105,33 @@ def get_codes(code_service: _CODE_SERVICES) -> DataFrame:
112105
return df
113106

114107
def get_usgs_samples(
115-
ssl_check=True,
116-
service: _SERVICES = "results",
117-
profile= "fullphyschem",
118-
**kwargs,
119-
) -> Tuple[DataFrame, BaseMetadata]:
108+
ssl_check: bool = True,
109+
service: _SERVICES = "results",
110+
profile: _PROFILES = "fullphyschem",
111+
activityMediaName: Optional[Union[str, list[str]]] = None,
112+
activityStartDateLower: Optional[str] = None,
113+
activityStartDateUpper: Optional[str] = None,
114+
activityTypeCode: Optional[Union[str, list[str]]] = None,
115+
characteristicGroup: Optional[Union[str, list[str]]] = None,
116+
characteristic: Optional[Union[str, list[str]]] = None,
117+
characteristicUserSupplied: Optional[Union[str, list[str]]] = None,
118+
boundingBox: Optional[list[float]] = None,
119+
countryFips: Optional[Union[str, list[str]]] = None,
120+
stateFips: Optional[Union[str, list[str]]] = None,
121+
countyFips: Optional[Union[str, list[str]]] = None,
122+
siteTypeCode: Optional[Union[str, list[str]]] = None,
123+
siteTypeName: Optional[Union[str, list[str]]] = None,
124+
usgsPCode: Optional[Union[str, list[str]]] = None,
125+
hydrologicUnit: Optional[Union[str, list[str]]] = None,
126+
monitoringLocationIdentifier: Optional[Union[str, list[str]]] = None,
127+
organizationIdentifier: Optional[Union[str, list[str]]] = None,
128+
pointLocationLatitude: Optional[float] = None,
129+
pointLocationLongitude: Optional[float] = None,
130+
pointLocationWithinMiles: Optional[float] = None,
131+
projectIdentifier: Optional[Union[str, list[str]]] = None,
132+
recordIdentifierUserSupplied: Optional[Union[str, list[str]]] = None,
133+
) -> Tuple[DataFrame, BaseMetadata]:
120134
"""Search Samples database for USGS water quality data.
121-
122135
This is a wrapper function for the Samples database API. All potential
123136
filters are provided as arguments to the function, but please do not
124137
populate all possible filters; leave as many as feasible with their default
@@ -143,10 +156,13 @@ def get_usgs_samples(
143156
profile : string
144157
One of the available profiles associated with a service. Options for each
145158
service are:
146-
results - "fullphyschem", "basicphyschem", "fullbio", "basicbio", "narrow",
147-
"resultdetectionquantitationlimit", "labsampleprep", "count"
159+
results - "fullphyschem", "basicphyschem",
160+
"fullbio", "basicbio", "narrow",
161+
"resultdetectionquantitationlimit",
162+
"labsampleprep", "count"
148163
locations - "site", "count"
149-
activities - "sampact", "actmetric", "actgroup", "count"
164+
activities - "sampact", "actmetric",
165+
"actgroup", "count"
150166
projects - "project", "projectmonitoringlocationweight"
151167
organizations - "organization", "count"
152168
activityMediaName : string or list of strings, optional
@@ -275,27 +291,22 @@ def get_usgs_samples(
275291
... usgsPCode='00400')
276292
277293
"""
294+
278295
_check_profiles(service, profile)
279296

280-
valid_kwargs = get_args(_SAMPLES_KWARGS)
281-
if not all(key in valid_kwargs for key in kwargs):
282-
raise ValueError(
283-
f"Invalid keyword arguments. Valid options are: {valid_kwargs}."
284-
)
297+
params = {
298+
k: v for k, v in locals().items()
299+
if k not in ["ssl_check", "service", "profile"]
300+
and v is not None
301+
}
285302

286-
if len(kwargs) == 0:
287-
raise TypeError(
288-
"No filter parameters provided. You must add at least "
289-
"one filter parameter beyond a service, profile, and format argument."
290-
)
291-
292-
params = {"mimeType": "text/csv"}
293-
params.update(kwargs)
303+
304+
params.update({"mimeType": "text/csv"})
294305

295306
if "boundingBox" in params:
296-
params['boundingBox'] = to_str(params['boundingBox'])
307+
params["boundingBox"] = to_str(params["boundingBox"])
297308

298-
url = f"{BASE_URL}/{service}/{profile}"
309+
url = f"{_BASE_URL}/{service}/{profile}"
299310

300311
req = PreparedRequest()
301312
req.prepare_url(url, params=params)
@@ -331,7 +342,7 @@ def _check_profiles(
331342
f"Valid options are: {valid_services}."
332343
)
333344

334-
valid_profiles = get_args(_PROFILES[service])
345+
valid_profiles = _PROFILE_LOOKUP[service]
335346
if profile not in valid_profiles:
336347
raise ValueError(
337348
f"Invalid profile: '{profile}' for service '{service}'. "

0 commit comments

Comments
 (0)