Skip to content

Commit b29ed46

Browse files
committed
Set waterdata data types
1 parent 8f37748 commit b29ed46

1 file changed

Lines changed: 24 additions & 14 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -668,32 +668,40 @@ def _arrange_cols(
668668
return df.rename(columns={"id": output_id})
669669

670670

671-
def _cleanup_cols(df: pd.DataFrame, service: str = "daily") -> pd.DataFrame:
671+
def _type_cols(df: pd.DataFrame) -> pd.DataFrame:
672672
"""
673-
Cleans and standardizes columns in a pandas DataFrame for water data endpoints.
673+
Casts columns into appropriate types.
674674
675675
Parameters
676676
----------
677677
df : pd.DataFrame
678678
The input DataFrame containing water data.
679-
service : str, optional
680-
The type of water data service (default is "daily").
681679
682680
Returns
683681
-------
684682
pd.DataFrame
685683
The cleaned DataFrame with standardized columns.
686684
687-
Notes
688-
-----
689-
- If the 'time' column exists and service is "daily", it is converted to date objects.
690-
- The 'value' and 'contributing_drainage_area' columns are coerced to numeric types.
691685
"""
692-
if "time" in df.columns and service == "daily":
693-
df["time"] = pd.to_datetime(df["time"]).dt.date
694-
for col in ["value", "contributing_drainage_area"]:
695-
if col in df.columns:
696-
df[col] = pd.to_numeric(df[col], errors="coerce")
686+
cols = set(df.columns)
687+
numerical_cols = ["value", "contributing_drainage_area"]
688+
time_cols = ["time", "datetime", "last_modified"]
689+
categorical_cols = [
690+
"approval_status",
691+
"monitoring_location_id",
692+
"parameter_code",
693+
"unit_of_measure",
694+
]
695+
696+
for col in cols.intersection(time_cols):
697+
df[col] = pd.to_datetime(df[col], errors="coerce")
698+
699+
for col in cols.intersection(numerical_cols):
700+
df[col] = pd.to_numeric(df[col], errors="coerce")
701+
702+
for col in cols.intersection(categorical_cols):
703+
df[col] = df[col].astype("category")
704+
697705
return df
698706

699707

@@ -749,8 +757,10 @@ def get_ogc_data(
749757
)
750758
# Manage some aspects of the returned dataset
751759
return_list = _deal_with_empty(return_list, properties, service)
760+
752761
if convert_type:
753-
return_list = _cleanup_cols(return_list, service=service)
762+
return_list = _type_cols(return_list)
763+
754764
return_list = _arrange_cols(return_list, properties, output_id)
755765
# Create metadata object from response
756766
metadata = BaseMetadata(response)

0 commit comments

Comments
 (0)