Skip to content

Commit cf0f4d7

Browse files
committed
change column ordering logic a little in case someone requests id
1 parent 5b42f0d commit cf0f4d7

1 file changed

Lines changed: 18 additions & 19 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -648,35 +648,34 @@ def _arrange_cols(
648648
pd.DataFrame or gpd.GeoDataFrame
649649
The DataFrame with columns rearranged and/or renamed according to the specified properties and output_id.
650650
"""
651+
652+
# Rename id column to output_id
653+
df = df.rename(columns={"id": output_id})
654+
655+
# If properties are provided, filter to only those columns
651656
if properties and not all(pd.isna(properties)):
652-
if "id" not in properties:
653-
# If user refers to service-specific output id in properties,
654-
# then rename the "id" column to the output_id (id column is
655-
# automatically included).
656-
if output_id in properties:
657-
df = df.rename(columns={"id": output_id})
658-
# If output id is not in properties, but user requests the plural
659-
# of the output_id (e.g. "monitoring_locations_id"), then rename
660-
# "id" to plural. This is pretty niche.
661-
else:
662-
plural = output_id.replace("_id", "s_id")
663-
if plural in properties:
664-
df = df.rename(columns={"id": plural})
657+
# id is technically a valid column from the service, but these
658+
# functions make the name more specific. So, if someone requests
659+
# 'id', give them the output_id column
660+
if 'id' in properties:
661+
properties[properties.index('id')] = output_id
665662
df = df.loc[:, [col for col in properties if col in df.columns]]
666-
else:
667-
df = df.rename(columns={"id": output_id})
668-
663+
669664
# Move meaningless-to-user, extra id columns to the end
670665
# of the dataframe, if they exist
671-
extra_id_cols = set(df.columns).intersection({
666+
extra_id_col = set(df.columns).intersection({
672667
"latest_continuous_id",
673668
"latest_daily_id",
674669
"daily_id",
675670
"continuous_id",
676671
"field_measurement_id"
677672
})
678-
if extra_id_cols:
679-
id_col_order = [col for col in df.columns if col not in extra_id_cols] + list(extra_id_cols)
673+
674+
# If the arbitrary id column is returned (either due to properties
675+
# being none or NaN), then move it to the end of the dataframe, but
676+
# if part of properties, keep in requested order
677+
if extra_id_col and properties is None or all(pd.isna(properties)):
678+
id_col_order = [col for col in df.columns if col not in extra_id_col] + list(extra_id_col)
680679
df = df.loc[:, id_col_order]
681680

682681
return df

0 commit comments

Comments
 (0)