Skip to content

Commit bd3f6ad

Browse files
committed
remove max_requests as this is confusing and should be better vetted and documented before adding
1 parent 8605dea commit bd3f6ad

2 files changed

Lines changed: 26 additions & 75 deletions

File tree

dataretrieval/waterdata/api.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def get_daily(
4242
time: Optional[Union[str, List[str]]] = None,
4343
bbox: Optional[List[float]] = None,
4444
limit: Optional[int] = None,
45-
max_results: Optional[int] = None,
4645
convert_type: bool = True,
4746
) -> Tuple[pd.DataFrame, BaseMetadata]:
4847
"""Daily data provide one data value to represent water conditions for the
@@ -166,9 +165,6 @@ def get_daily(
166165
allowable limit is 10000. It may be beneficial to set this number lower
167166
if your internet connection is spotty. The default (NA) will set the
168167
limit to the maximum allowable limit for the service.
169-
max_results : numeric, optional
170-
The optional maximum number of rows to return. This value must be less
171-
than the requested limit.
172168
convert_type : boolean, optional
173169
If True, the function will convert the data to dates and qualifier to
174170
string vector
@@ -258,7 +254,6 @@ def get_monitoring_locations(
258254
time: Optional[Union[str, List[str]]] = None,
259255
bbox: Optional[List[float]] = None,
260256
limit: Optional[int] = None,
261-
max_results: Optional[int] = None,
262257
convert_type: bool = True,
263258
) -> Tuple[pd.DataFrame, BaseMetadata]:
264259
"""Location information is basic information about the monitoring location
@@ -477,9 +472,6 @@ def get_monitoring_locations(
477472
allowable limit is 10000. It may be beneficial to set this number lower
478473
if your internet connection is spotty. The default (NA) will set the
479474
limit to the maximum allowable limit for the service.
480-
max_results : numeric, optional
481-
The optional maximum number of rows to return. This value must be less
482-
than the requested limit.
483475
skip_geometry : boolean, optional
484476
This option can be used to skip response geometries for each feature.
485477
The returning object will be a data frame with no spatial information.
@@ -545,7 +537,6 @@ def get_time_series_metadata(
545537
time: Optional[Union[str, List[str]]] = None,
546538
bbox: Optional[List[float]] = None,
547539
limit: Optional[int] = None,
548-
max_results: Optional[int] = None,
549540
convert_type: bool = True,
550541
) -> Tuple[pd.DataFrame, BaseMetadata]:
551542
"""Daily data and continuous measurements are grouped into time series,
@@ -672,9 +663,6 @@ def get_time_series_metadata(
672663
allowable limit is 10000. It may be beneficial to set this number lower
673664
if your internet connection is spotty. The default (None) will set the
674665
limit to the maximum allowable limit for the service.
675-
max_results : numeric, optional
676-
The optional maximum number of rows to return. This value must be less
677-
than the requested limit.
678666
convert_type : boolean, optional
679667
If True, the function will convert the data to dates and qualifier to
680668
string vector
@@ -733,7 +721,6 @@ def get_latest_continuous(
733721
time: Optional[Union[str, List[str]]] = None,
734722
bbox: Optional[List[float]] = None,
735723
limit: Optional[int] = None,
736-
max_results: Optional[int] = None,
737724
convert_type: bool = True,
738725
) -> Tuple[pd.DataFrame, BaseMetadata]:
739726
"""This endpoint provides the most recent observation for each time series
@@ -854,9 +841,6 @@ def get_latest_continuous(
854841
allowable limit is 10000. It may be beneficial to set this number lower
855842
if your internet connection is spotty. The default (None) will set the
856843
limit to the maximum allowable limit for the service.
857-
max_results : numeric, optional
858-
The optional maximum number of rows to return. This value must be less
859-
than the requested limit.
860844
convert_type : boolean, optional
861845
If True, the function will convert the data to dates and qualifier to
862846
string vector
@@ -915,7 +899,6 @@ def get_field_measurements(
915899
time: Optional[Union[str, List[str]]] = None,
916900
bbox: Optional[List[float]] = None,
917901
limit: Optional[int] = None,
918-
max_results: Optional[int] = None,
919902
convert_type: bool = True,
920903
) -> Tuple[pd.DataFrame, BaseMetadata]:
921904
"""Field measurements are physically measured values collected during a
@@ -1026,9 +1009,6 @@ def get_field_measurements(
10261009
allowable limit is 10000. It may be beneficial to set this number lower
10271010
if your internet connection is spotty. The default (None) will set the
10281011
limit to the maximum allowable limit for the service.
1029-
max_results : numeric, optional
1030-
The optional maximum number of rows to return. This value must be less
1031-
than the requested limit.
10321012
convert_type : boolean, optional
10331013
If True, the function will convert the data to dates and qualifier to
10341014
string vector

dataretrieval/waterdata/utils.py

Lines changed: 26 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,6 @@ def _construct_api_requests(
320320
properties: Optional[List[str]] = None,
321321
bbox: Optional[List[float]] = None,
322322
limit: Optional[int] = None,
323-
max_results: Optional[int] = None,
324323
skip_geometry: bool = False,
325324
**kwargs,
326325
):
@@ -341,8 +340,6 @@ def _construct_api_requests(
341340
Bounding box coordinates as a list of floats.
342341
limit : Optional[int], optional
343342
Maximum number of results to return per request.
344-
max_results : Optional[int], optional
345-
Maximum number of rows to return.
346343
skip_geometry : bool, optional
347344
Whether to exclude geometry from the response (default is False).
348345
**kwargs
@@ -354,11 +351,6 @@ def _construct_api_requests(
354351
requests.PreparedRequest
355352
The constructed HTTP request object ready to be sent.
356353
357-
Raises
358-
------
359-
ValueError
360-
If `limit` is greater than `max_results`.
361-
362354
Notes
363355
-----
364356
- Date/time parameters are automatically formatted to ISO8601.
@@ -367,6 +359,7 @@ def _construct_api_requests(
367359
- The function sets appropriate headers for GET and POST requests.
368360
"""
369361
service_url = f"{OGC_API_URL}/collections/{service}/items"
362+
370363
# Single parameters can only have one value
371364
single_params = {"datetime", "last_modified", "begin", "end", "time"}
372365

@@ -381,17 +374,12 @@ def _construct_api_requests(
381374
params = {k: v for k, v in kwargs.items() if k not in post_params}
382375
# Set skipGeometry parameter (API expects camelCase)
383376
params["skipGeometry"] = skip_geometry
384-
# If limit is none and max_results is not none, then set limit to max results. Otherwise,
385-
# if max_results is none, set it to 10000 (the API max).
377+
378+
# If limit is none or greater than 10000, then set limit to max results. Otherwise,
379+
# use the limit
386380
params["limit"] = (
387-
max_results if limit is None and max_results is not None else limit or 10000
388-
)
389-
# Add max results as a parameter if it is not None
390-
if max_results is not None:
391-
params["max_results"] = max_results
392-
393-
if max_results is not None and limit is not None and limit > max_results:
394-
raise ValueError("limit cannot be greater than max_result")
381+
10000 if limit is None or limit > 10000 else limit
382+
)
395383

396384
# Indicate if function needs to perform POST conversion
397385
POST = bool(post_params)
@@ -521,7 +509,6 @@ def _get_resp_data(resp: requests.Response, geopd: bool) -> pd.DataFrame:
521509
def _walk_pages(
522510
geopd: bool,
523511
req: requests.PreparedRequest,
524-
max_results: Optional[int],
525512
client: Optional[requests.Session] = None,
526513
) -> Tuple[pd.DataFrame, requests.Response]:
527514
"""
@@ -534,9 +521,6 @@ def _walk_pages(
534521
geometries.
535522
req : requests.PreparedRequest
536523
The initial HTTP request to send.
537-
max_results : Optional[int]
538-
Maximum number of rows to return. If None or NaN, retrieves all
539-
available pages.
540524
client : Optional[requests.Session], default None
541525
An optional HTTP client to use for requests. If not provided, a new
542526
client is created.
@@ -552,13 +536,6 @@ def _walk_pages(
552536
------
553537
Exception
554538
If a request fails or returns a non-200 status code.
555-
556-
Notes
557-
-----
558-
- If `max_results` is None or NaN, the function will continue to request
559-
subsequent pages until no more pages are available.
560-
- Failed requests are tracked and reported, but do not halt the entire
561-
process unless the initial request fails.
562539
"""
563540
logger.info("Requesting: %s", req.url)
564541

@@ -586,29 +563,25 @@ def _walk_pages(
586563
headers = dict(req.headers)
587564
content = req.body if method == "POST" else None
588565

589-
if max_results is None or pd.isna(max_results):
590-
dfs = _get_resp_data(resp, geopd=geopd)
591-
curr_url = _next_req_url(resp)
592-
while curr_url:
593-
try:
594-
resp = client.request(
595-
method,
596-
curr_url,
597-
headers=headers,
598-
data=content if method == "POST" else None,
566+
dfs = _get_resp_data(resp, geopd=geopd)
567+
curr_url = _next_req_url(resp)
568+
while curr_url:
569+
try:
570+
resp = client.request(
571+
method,
572+
curr_url,
573+
headers=headers,
574+
data=content if method == "POST" else None,
599575
)
600-
if resp.status_code != 200:
601-
raise Exception(_error_body(resp))
602-
df1 = _get_resp_data(resp, geopd=geopd)
603-
dfs = pd.concat([dfs, df1], ignore_index=True)
604-
curr_url = _next_req_url(resp)
605-
except Exception:
606-
logger.info("Request failed for URL: %s. Stopping pagination and data download.", curr_url)
607-
curr_url = None
608-
return dfs, initial_response
609-
else:
610-
resp.raise_for_status()
611-
return _get_resp_data(resp, geopd=geopd), initial_response
576+
if resp.status_code != 200:
577+
raise Exception(_error_body(resp))
578+
df1 = _get_resp_data(resp, geopd=geopd)
579+
dfs = pd.concat([dfs, df1], ignore_index=True)
580+
curr_url = _next_req_url(resp)
581+
except Exception:
582+
logger.info("Request failed for URL: %s. Stopping pagination and data download.", curr_url)
583+
curr_url = None
584+
return dfs, initial_response
612585
finally:
613586
if close_client:
614587
client.close()
@@ -742,14 +715,12 @@ def get_ogc_data(
742715
Notes
743716
-----
744717
- The function does not mutate the input `args` dictionary.
745-
- Handles optional arguments such as `max_results` and `convert_type`.
718+
- Handles optional arguments such as `convert_type`.
746719
- Applies column cleanup and reordering based on service and properties.
747720
"""
748721
args = args.copy()
749722
# Add service as an argument
750723
args["service"] = service
751-
# Pull out a max results input if exists
752-
max_results = args.pop("max_results", None)
753724
# Switch the input id to "id" if needed
754725
args = _switch_arg_id(args, id_name=output_id, service=service)
755726
properties = args.get("properties")
@@ -764,7 +735,7 @@ def get_ogc_data(
764735
req = _construct_api_requests(**args)
765736
# Run API request and iterate through pages if needed
766737
return_list, response = _walk_pages(
767-
geopd=GEOPANDAS, req=req, max_results=max_results
738+
geopd=GEOPANDAS, req=req
768739
)
769740
# Manage some aspects of the returned dataset
770741
return_list = _deal_with_empty(return_list, properties, service)

0 commit comments

Comments
 (0)