@@ -108,46 +108,48 @@ def _map_response_column_names(cname: str) -> str:
108108 return "_" .join ([parts [1 ], parts [0 ].removeprefix ("response" )])
109109
110110 @staticmethod
111- def _fill_item_response (* response_columns : str ) -> Generator [pl .Expr , None , None ]:
112- for response_col in response_columns :
113- yield (
114- pl .when (pl .col (response_col ).is_null ())
115- .then (pl .col (f"{ response_col } __response" ))
116- .otherwise (pl .col (response_col ))
117- .alias (response_col )
118- )
111+ def _fill_item_response (* null_score_columns : str ) -> Generator [pl .Expr , None , None ]:
112+ for col in null_score_columns :
113+ yield pl .col (f"{ col } __response" ).alias (col )
119114
120115 @staticmethod
121116 def _pivot_singleselect (
122117 df : pl .DataFrame , option_scores : pl .DataFrame
123118 ) -> pl .DataFrame :
124- # Score single select responses.
125- response_options = option_scores .with_columns (
126- pl .col ("item_option_value" ).alias ("response_item" ),
127- pl .col ("item_option_score" ).alias ("response_score" ),
128- pl .col ("item_option_name" ).alias ("response_response" ),
129- ).drop ("item_option_score" , "item_option_value" , "item_option_name" )
119+ # Rename columns in scores table.
120+ response_options = option_scores .rename (
121+ {
122+ "item_option_value" : "response_index" ,
123+ "item_option_score" : "response_score" ,
124+ "item_option_name" : "response_response" ,
125+ }
126+ )
130127
131128 df = (
129+ # Extract value of response.
132130 df .with_columns (
133- response_item = pl .col ("response_value" ).struct .field ("single_value" )
131+ response_index = pl .col ("response_value" ).struct .field ("single_value" )
134132 )
135133 .drop ("response_value" )
134+ # Join to score responses.
136135 .join (
137136 response_options ,
138137 on = [
139138 "applet_version" ,
140139 "activity_flow" ,
141140 "activity" ,
142141 "item" ,
143- "response_item " ,
142+ "response_index " ,
144143 ],
145144 how = "left" ,
146145 validate = "m:1" ,
147146 )
147+ # Extract item name for pivot.
148148 .with_columns (item_name = pl .col ("item" ).struct .field ("name" ))
149149 .drop ("item" )
150+ # Pivot on item_name producing 3 columns for each item.
150151 .pivot (on = "item_name" , values = cs .starts_with ("response" ), separator = "__" )
152+ # Rename pivoted columns to
151153 .with_columns (
152154 cs .starts_with ("response" ).name .map (
153155 WideFormat ._map_response_column_names
@@ -156,18 +158,18 @@ def _pivot_singleselect(
156158 .drop (cs .starts_with ("response" ))
157159 )
158160
159- response_columns = {
161+ # Rename score columns to bare name of item.
162+ score_columns = {
160163 s : s .rsplit ("__" )[0 ]
161164 for s in cs .expand_selector (df , cs .ends_with ("__score" ))
162165 }
163- return (
164- df .rename (
165- response_columns
166- ).with_columns ( # Rename <QUESTION>__score to <QUESTION>.
167- WideFormat ._fill_item_response (* response_columns .values ())
168- ) # Use value of __response if __score is null.
169- # .drop(cs.ends_with("__item"))
170- )
166+ # Rename <QUESTION>__score to <QUESTION>.
167+ df = df .rename (score_columns )
168+ null_score_columns = {
169+ col for col in score_columns .values () if df [col ].is_null ().all ()
170+ }
171+ # Fill null <QUESTION> columns with value of <QUESTION>__response.
172+ return df .with_columns (WideFormat ._fill_item_response (* null_score_columns ))
171173
172174 @staticmethod
173175 def _pivot_text (df : pl .DataFrame , option_scores : pl .DataFrame ) -> pl .DataFrame :
0 commit comments