Skip to content

Commit fd35362

Browse files
committed
Merge branch 'main' into add-continuous
2 parents d5d5788 + 237ed81 commit fd35362

4 files changed

Lines changed: 52 additions & 57699 deletions

File tree

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ location for water year 2025, where a "/" between two dates in the "time"
6666
input argument indicates a desired date range:
6767

6868
```python
69-
import dataretrieval.waterdata as waterdata
69+
from dataretrieval import waterdata
7070

7171
# Get daily streamflow data (returns DataFrame and metadata)
7272
df, metadata = waterdata.get_daily(
@@ -130,7 +130,7 @@ logging.basicConfig(filename='waterdata.log', level=logging.INFO)
130130
The `nwis` module accesses legacy NWIS Water Services:
131131

132132
```python
133-
import dataretrieval.nwis as nwis
133+
from dataretrieval import nwis
134134

135135
# Get site information
136136
info, metadata = nwis.get_info(sites='01646500')
@@ -153,7 +153,7 @@ print(f"Retrieved {len(dv)} daily values")
153153
Access water quality data from multiple agencies:
154154

155155
```python
156-
import dataretrieval.wqp as wqp
156+
from dataretrieval import wqp
157157

158158
# Find water quality monitoring sites
159159
sites = wqp.what_sites(
@@ -177,7 +177,7 @@ print(f"Retrieved {len(results)} temperature measurements")
177177
Discover and navigate hydrologic networks:
178178

179179
```python
180-
import dataretrieval.nldi as nldi
180+
from dataretrieval import nldi
181181

182182
# Get watershed basin for a stream reach
183183
basin = nldi.get_basin(

dataretrieval/waterdata/api.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,11 @@ def get_daily(
161161
limit : numeric, optional
162162
The optional limit parameter is used to control the subset of the
163163
selected features that should be returned in each page. The maximum
164-
allowable limit is 10000. It may be beneficial to set this number lower
164+
allowable limit is 50000. It may be beneficial to set this number lower
165165
if your internet connection is spotty. The default (NA) will set the
166166
limit to the maximum allowable limit for the service.
167167
convert_type : boolean, optional
168-
If True, the function will convert the data to dates and qualifier to
169-
string vector
168+
If True, converts columns to appropriate types.
170169
171170
Returns
172171
-------
@@ -632,14 +631,16 @@ def get_monitoring_locations(
632631
limit : numeric, optional
633632
The optional limit parameter is used to control the subset of the
634633
selected features that should be returned in each page. The maximum
635-
allowable limit is 10000. It may be beneficial to set this number lower
634+
allowable limit is 50000. It may be beneficial to set this number lower
636635
if your internet connection is spotty. The default (NA) will set the
637636
limit to the maximum allowable limit for the service.
638637
skip_geometry : boolean, optional
639638
This option can be used to skip response geometries for each feature.
640639
The returning object will be a data frame with no spatial information.
641640
Note that the USGS Water Data APIs use camelCase "skipGeometry" in
642641
CQL2 queries.
642+
convert_type : boolean, optional
643+
If True, converts columns to appropriate types.
643644
644645
Returns
645646
-------
@@ -827,12 +828,11 @@ def get_time_series_metadata(
827828
limit : numeric, optional
828829
The optional limit parameter is used to control the subset of the
829830
selected features that should be returned in each page. The maximum
830-
allowable limit is 10000. It may be beneficial to set this number lower
831+
allowable limit is 50000. It may be beneficial to set this number lower
831832
if your internet connection is spotty. The default (None) will set the
832833
limit to the maximum allowable limit for the service.
833834
convert_type : boolean, optional
834-
If True, the function will convert the data to dates and qualifier to
835-
string vector
835+
If True, converts columns to appropriate types.
836836
837837
Returns
838838
-------
@@ -1003,12 +1003,11 @@ def get_latest_continuous(
10031003
limit : numeric, optional
10041004
The optional limit parameter is used to control the subset of the
10051005
selected features that should be returned in each page. The maximum
1006-
allowable limit is 10000. It may be beneficial to set this number lower
1006+
allowable limit is 50000. It may be beneficial to set this number lower
10071007
if your internet connection is spotty. The default (None) will set the
10081008
limit to the maximum allowable limit for the service.
10091009
convert_type : boolean, optional
1010-
If True, the function will convert the data to dates and qualifier to
1011-
string vector
1010+
If True, converts columns to appropriate types.
10121011
10131012
Returns
10141013
-------
@@ -1178,12 +1177,11 @@ def get_latest_daily(
11781177
limit : numeric, optional
11791178
The optional limit parameter is used to control the subset of the
11801179
selected features that should be returned in each page. The maximum
1181-
allowable limit is 10000. It may be beneficial to set this number lower
1180+
allowable limit is 50000. It may be beneficial to set this number lower
11821181
if your internet connection is spotty. The default (None) will set the
11831182
limit to the maximum allowable limit for the service.
11841183
convert_type : boolean, optional
1185-
If True, the function will convert the data to dates and qualifier to
1186-
string vector
1184+
If True, converts columns to appropriate types.
11871185
11881186
Returns
11891187
-------
@@ -1344,12 +1342,11 @@ def get_field_measurements(
13441342
limit : numeric, optional
13451343
The optional limit parameter is used to control the subset of the
13461344
selected features that should be returned in each page. The maximum
1347-
allowable limit is 10000. It may be beneficial to set this number lower
1345+
allowable limit is 50000. It may be beneficial to set this number lower
13481346
if your internet connection is spotty. The default (None) will set the
13491347
limit to the maximum allowable limit for the service.
13501348
convert_type : boolean, optional
1351-
If True, the function will convert the data to dates and qualifier to
1352-
string vector
1349+
If True, converts columns to appropriate types.
13531350
13541351
Returns
13551352
-------

dataretrieval/waterdata/utils.py

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -382,10 +382,10 @@ def _construct_api_requests(
382382
# Set skipGeometry parameter (API expects camelCase)
383383
params["skipGeometry"] = skip_geometry
384384

385-
# If limit is none or greater than 10000, then set limit to max results. Otherwise,
385+
# If limit is none or greater than 50000, then set limit to max results. Otherwise,
386386
# use the limit
387387
params["limit"] = (
388-
10000 if limit is None or limit > 10000 else limit
388+
50000 if limit is None or limit > 50000 else limit
389389
)
390390

391391
# Indicate if function needs to perform POST conversion
@@ -667,32 +667,48 @@ def _arrange_cols(
667667
return df.rename(columns={"id": output_id})
668668

669669

670-
def _cleanup_cols(df: pd.DataFrame, service: str = "daily") -> pd.DataFrame:
670+
def _type_cols(df: pd.DataFrame) -> pd.DataFrame:
671671
"""
672-
Cleans and standardizes columns in a pandas DataFrame for water data endpoints.
672+
Casts columns into appropriate types.
673673
674674
Parameters
675675
----------
676676
df : pd.DataFrame
677677
The input DataFrame containing water data.
678-
service : str, optional
679-
The type of water data service (default is "daily").
680678
681679
Returns
682680
-------
683681
pd.DataFrame
684-
The cleaned DataFrame with standardized columns.
682+
The DataFrame with columns cast to appropriate types.
685683
686-
Notes
687-
-----
688-
- If the 'time' column exists and service is "daily", it is converted to date objects.
689-
- The 'value' and 'contributing_drainage_area' columns are coerced to numeric types.
690684
"""
691-
if "time" in df.columns and service == "daily":
692-
df["time"] = pd.to_datetime(df["time"]).dt.date
693-
for col in ["value", "contributing_drainage_area"]:
694-
if col in df.columns:
695-
df[col] = pd.to_numeric(df[col], errors="coerce")
685+
cols = set(df.columns)
686+
numerical_cols = [
687+
"altitude",
688+
"altitude_accuracy",
689+
"contributing_drainage_area",
690+
"drainage_area",
691+
"hole_constructed_depth",
692+
"value",
693+
"well_constructed_depth",
694+
]
695+
time_cols = [
696+
"begin",
697+
"begin_utc",
698+
"construction_date",
699+
"end",
700+
"end_utc",
701+
"datetime", # unused
702+
"last_modified",
703+
"time",
704+
]
705+
706+
for col in cols.intersection(time_cols):
707+
df[col] = pd.to_datetime(df[col], errors="coerce")
708+
709+
for col in cols.intersection(numerical_cols):
710+
df[col] = pd.to_numeric(df[col], errors="coerce")
711+
696712
return df
697713

698714

@@ -748,8 +764,10 @@ def get_ogc_data(
748764
)
749765
# Manage some aspects of the returned dataset
750766
return_list = _deal_with_empty(return_list, properties, service)
767+
751768
if convert_type:
752-
return_list = _cleanup_cols(return_list, service=service)
769+
return_list = _type_cols(return_list)
770+
753771
return_list = _arrange_cols(return_list, properties, output_id)
754772
# Create metadata object from response
755773
metadata = BaseMetadata(response)

0 commit comments

Comments
 (0)