Skip to content

Commit 0ba23a1

Browse files
committed
🐛 Fix mutliselect options columns
1 parent 21d8578 commit 0ba23a1

1 file changed

Lines changed: 29 additions & 27 deletions

File tree

src/mindlogger_data_export/outputs.py

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -96,23 +96,14 @@ def __init__(self, *args, include_options: bool = False, **kwargs) -> None:
9696

9797
@staticmethod
9898
def _pivot_multiselect(
99-
df: pl.DataFrame, option_scores: pl.DataFrame, *, include_options: bool = False
99+
df: pl.DataFrame,
100+
option_scores: pl.DataFrame,
101+
*,
102+
include_options: bool = False, # noqa: ARG004
100103
) -> pl.DataFrame:
101104
del option_scores
102-
item_options_map: pl.DataFrame = pl.DataFrame()
103-
# Extract `response_options` before exploding (all options share the same list)
104-
if include_options:
105-
# Get unique `response_options` per item (before exploding)
106-
item_options_map = df.select(
107-
[
108-
pl.col("item").struct.field("name").alias("item_name"),
109-
pl.col("item")
110-
.struct.field("response_options")
111-
.alias("response_options"),
112-
]
113-
).unique(subset=["item_name"])
114105

115-
df = (
106+
return (
116107
df.with_columns(item_option=pl.col("item").struct.field("response_options"))
117108
.explode("item_option")
118109
# Generate value column indicating presence of response.
@@ -136,17 +127,6 @@ def _pivot_multiselect(
136127
)
137128
)
138129

139-
# Join back the `response_options` for each item
140-
if include_options:
141-
for row in item_options_map.iter_rows(named=True):
142-
item_name = row["item_name"]
143-
options_col = f"{item_name}_options"
144-
df = df.with_columns(
145-
[pl.lit(row["response_options"]).alias(options_col)]
146-
)
147-
148-
return df
149-
150130
@staticmethod
151131
def _map_response_column_names(cname: str) -> str:
152132
parts = cname.split("__", 1)
@@ -213,7 +193,12 @@ def _pivot_singleselect(
213193
]
214194

215195
df = (
216-
df.pivot(on="item_name", values=pivot_values, separator="__")
196+
df.pivot(
197+
on="item_name",
198+
values=pivot_values,
199+
separator="__",
200+
aggregate_function="first" if include_options else None,
201+
)
217202
# Rename pivoted columns
218203
.with_columns(
219204
cs.starts_with("response").name.map(
@@ -293,6 +278,7 @@ def _pivot_subscale(
293278
on="item_name",
294279
values=pivot_values,
295280
separator="__",
281+
aggregate_function="first" if include_options else None,
296282
).rename(
297283
lambda s: s.removesuffix("__response_response")
298284
if s.endswith("__response_response")
@@ -435,7 +421,11 @@ def _prepare_activity_columns(
435421
)
436422

437423
# Stringify `_options` columns
438-
options_cols = [col for col in df.columns if col.endswith("_options")]
424+
options_cols = [
425+
col
426+
for col in df.columns
427+
if col.endswith("_options") or "_response_options_" in col
428+
]
439429
for col in options_cols:
440430
df = df.with_columns(
441431
[
@@ -476,6 +466,18 @@ def _prepare_activity_columns(
476466
score_col = f"{base_name}_score"
477467
df = df.with_columns([pl.col(col).alias(score_col)])
478468

469+
# Drop multiselect response_options columns (they're redundant - all options share same list)
470+
df = df.select(
471+
[
472+
col
473+
for col in df.columns
474+
if not (
475+
"_response_options_" in col
476+
and col.split("_response_options_")[-1].split("_")[-1].isdigit()
477+
)
478+
]
479+
)
480+
479481
# Create REDCap `_response` columns from `_index` for select items (`_index + 1`)
480482
for col in index_cols:
481483
response_col = col.replace("_index", "_response")

0 commit comments

Comments
 (0)