@@ -96,23 +96,14 @@ def __init__(self, *args, include_options: bool = False, **kwargs) -> None:
9696
9797 @staticmethod
9898 def _pivot_multiselect (
99- df : pl .DataFrame , option_scores : pl .DataFrame , * , include_options : bool = False
99+ df : pl .DataFrame ,
100+ option_scores : pl .DataFrame ,
101+ * ,
102+ include_options : bool = False , # noqa: ARG004
100103 ) -> pl .DataFrame :
101104 del option_scores
102- item_options_map : pl .DataFrame = pl .DataFrame ()
103- # Extract `response_options` before exploding (all options share the same list)
104- if include_options :
105- # Get unique `response_options` per item (before exploding)
106- item_options_map = df .select (
107- [
108- pl .col ("item" ).struct .field ("name" ).alias ("item_name" ),
109- pl .col ("item" )
110- .struct .field ("response_options" )
111- .alias ("response_options" ),
112- ]
113- ).unique (subset = ["item_name" ])
114105
115- df = (
106+ return (
116107 df .with_columns (item_option = pl .col ("item" ).struct .field ("response_options" ))
117108 .explode ("item_option" )
118109 # Generate value column indicating presence of response.
@@ -136,17 +127,6 @@ def _pivot_multiselect(
136127 )
137128 )
138129
139- # Join back the `response_options` for each item
140- if include_options :
141- for row in item_options_map .iter_rows (named = True ):
142- item_name = row ["item_name" ]
143- options_col = f"{ item_name } _options"
144- df = df .with_columns (
145- [pl .lit (row ["response_options" ]).alias (options_col )]
146- )
147-
148- return df
149-
150130 @staticmethod
151131 def _map_response_column_names (cname : str ) -> str :
152132 parts = cname .split ("__" , 1 )
@@ -213,7 +193,12 @@ def _pivot_singleselect(
213193 ]
214194
215195 df = (
216- df .pivot (on = "item_name" , values = pivot_values , separator = "__" )
196+ df .pivot (
197+ on = "item_name" ,
198+ values = pivot_values ,
199+ separator = "__" ,
200+ aggregate_function = "first" if include_options else None ,
201+ )
217202 # Rename pivoted columns
218203 .with_columns (
219204 cs .starts_with ("response" ).name .map (
@@ -293,6 +278,7 @@ def _pivot_subscale(
293278 on = "item_name" ,
294279 values = pivot_values ,
295280 separator = "__" ,
281+ aggregate_function = "first" if include_options else None ,
296282 ).rename (
297283 lambda s : s .removesuffix ("__response_response" )
298284 if s .endswith ("__response_response" )
@@ -435,7 +421,11 @@ def _prepare_activity_columns(
435421 )
436422
437423 # Stringify `_options` columns
438- options_cols = [col for col in df .columns if col .endswith ("_options" )]
424+ options_cols = [
425+ col
426+ for col in df .columns
427+ if col .endswith ("_options" ) or "_response_options_" in col
428+ ]
439429 for col in options_cols :
440430 df = df .with_columns (
441431 [
@@ -476,6 +466,18 @@ def _prepare_activity_columns(
476466 score_col = f"{ base_name } _score"
477467 df = df .with_columns ([pl .col (col ).alias (score_col )])
478468
469+ # Drop multiselect response_options columns (they're redundant - all options share same list)
470+ df = df .select (
471+ [
472+ col
473+ for col in df .columns
474+ if not (
475+ "_response_options_" in col
476+ and col .split ("_response_options_" )[- 1 ].split ("_" )[- 1 ].isdigit ()
477+ )
478+ ]
479+ )
480+
479481 # Create REDCap `_response` columns from `_index` for select items (`_index + 1`)
480482 for col in index_cols :
481483 response_col = col .replace ("_index" , "_response" )
0 commit comments