Skip to content

Commit 5583ead

Browse files
committed
Add _response and _item columns back in
1 parent a69332c commit 5583ead

1 file changed

Lines changed: 42 additions & 15 deletions

File tree

src/mindlogger_data_export/outputs.py

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def _fill_item_response(*response_columns: str) -> Generator[pl.Expr, None, None
112112
for response_col in response_columns:
113113
yield (
114114
pl.when(pl.col(response_col).is_null())
115-
.then(pl.col(f"{response_col}__name"))
115+
.then(pl.col(f"{response_col}__response"))
116116
.otherwise(pl.col(response_col))
117117
.alias(response_col)
118118
)
@@ -123,14 +123,14 @@ def _pivot_singleselect(
123123
) -> pl.DataFrame:
124124
# Score single select responses.
125125
response_options = option_scores.with_columns(
126-
pl.col("item_option_value").alias("response_index"),
126+
pl.col("item_option_value").alias("response_item"),
127127
pl.col("item_option_score").alias("response_score"),
128-
pl.col("item_option_name").alias("response_name"),
128+
pl.col("item_option_name").alias("response_response"),
129129
).drop("item_option_score", "item_option_value", "item_option_name")
130130

131131
df = (
132132
df.with_columns(
133-
response_index=pl.col("response_value").struct.field("single_value")
133+
response_item=pl.col("response_value").struct.field("single_value")
134134
)
135135
.drop("response_value")
136136
.join(
@@ -140,7 +140,7 @@ def _pivot_singleselect(
140140
"activity_flow",
141141
"activity",
142142
"item",
143-
"response_index",
143+
"response_item",
144144
],
145145
how="left",
146146
validate="m:1",
@@ -161,11 +161,12 @@ def _pivot_singleselect(
161161
for s in cs.expand_selector(df, cs.ends_with("__score"))
162162
}
163163
return (
164-
df.rename(response_columns) # Rename <QUESTION>__score to <QUESTION>.
165-
.with_columns(
164+
df.rename(
165+
response_columns
166+
).with_columns( # Rename <QUESTION>__score to <QUESTION>.
166167
WideFormat._fill_item_response(*response_columns.values())
167-
) # Use value of __name if __score is null.
168-
.drop(cs.ends_with("__index", "__name"))
168+
) # Use value of __response if __score is null.
169+
# .drop(cs.ends_with("__item"))
169170
)
170171

171172
@staticmethod
@@ -176,8 +177,20 @@ def _pivot_text(df: pl.DataFrame, option_scores: pl.DataFrame) -> pl.DataFrame:
176177
response_value=pl.col("response_value").struct.field("text"),
177178
item_name=pl.col("item").struct.field("name"),
178179
)
180+
.with_columns(response_response=pl.col("response_value"))
179181
.drop("item")
180-
.pivot(on="item_name", values="response_value")
182+
.pivot(
183+
on="item_name",
184+
values=["response_value", "response_response"],
185+
separator="__",
186+
)
187+
.rename(
188+
lambda s: s.removesuffix("__response_response")
189+
if s.endswith("__response_response")
190+
else s.removesuffix("_value")
191+
if s.endswith("__response_value")
192+
else s
193+
)
181194
)
182195

183196
@staticmethod
@@ -188,8 +201,20 @@ def _pivot_subscale(df: pl.DataFrame, option_scores: pl.DataFrame) -> pl.DataFra
188201
response_value=pl.col("response_value").struct.field("subscale"),
189202
item_name=pl.col("item").struct.field("name"),
190203
)
204+
.with_columns(response_response=pl.col("response_value"))
191205
.drop("item")
192-
.pivot(on="item_name", values="response_value")
206+
.pivot(
207+
on="item_name",
208+
values=["response_value", "response_response"],
209+
separator="__",
210+
)
211+
.rename(
212+
lambda s: s.removesuffix("__response_response")
213+
if s.endswith("__response_response")
214+
else s.removesuffix("_value")
215+
if s.endswith("__response_value")
216+
else s
217+
)
193218
)
194219

195220
PIVOT_FNS = {
@@ -230,8 +255,7 @@ def _typed_pivot(
230255
item_type=pl.col("item").struct.field("type")
231256
).partition_by("item_type", include_key=False, as_dict=True) # type: ignore
232257

233-
# Multiple Selection
234-
# Convert multiselect into one-hot encoding.
258+
# Perform pivot in function selected by type.
235259
pivoted_dfs = [
236260
self._get_pivot_fn(partition_type)(partition_df, option_scores)
237261
for partition_type, partition_df in typed_partitions.items()
@@ -258,6 +282,9 @@ def _typed_pivot(
258282
return df.select(idx_columns, response_columns)
259283

260284
def _format(self, data: MindloggerData) -> list[NamedOutput]:
285+
ml_report = data.report.with_columns(
286+
utc_timezone_offset=pl.col("utc_timezone_offset").dt.to_string("iso")
287+
)
261288
if (
262289
"split_activities" in self._extra
263290
and self._extra["split_activities"].lower() == "true"
@@ -267,7 +294,7 @@ def _format(self, data: MindloggerData) -> list[NamedOutput]:
267294
f"{activity[1]}",
268295
self._typed_pivot(activity_df, data.item_response_options),
269296
)
270-
for activity, activity_df in data.report.with_columns(
297+
for activity, activity_df in ml_report.with_columns(
271298
activity_id=pl.col("activity").struct.field("id"),
272299
activity_name=pl.col("activity").struct.field("name"),
273300
)
@@ -279,7 +306,7 @@ def _format(self, data: MindloggerData) -> list[NamedOutput]:
279306

280307
return [
281308
NamedOutput(
282-
"wide_data", self._typed_pivot(data.report, data.item_response_options)
309+
"wide_data", self._typed_pivot(ml_report, data.item_response_options)
283310
)
284311
]
285312

0 commit comments

Comments
 (0)