Skip to content

Commit 3d4f89e

Browse files
committed
Update Wide Format output to conform to HBN Spec
1 parent 5583ead commit 3d4f89e

1 file changed

Lines changed: 27 additions & 25 deletions

File tree

src/mindlogger_data_export/outputs.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -108,46 +108,48 @@ def _map_response_column_names(cname: str) -> str:
108108
return "_".join([parts[1], parts[0].removeprefix("response")])
109109

110110
@staticmethod
111-
def _fill_item_response(*response_columns: str) -> Generator[pl.Expr, None, None]:
112-
for response_col in response_columns:
113-
yield (
114-
pl.when(pl.col(response_col).is_null())
115-
.then(pl.col(f"{response_col}__response"))
116-
.otherwise(pl.col(response_col))
117-
.alias(response_col)
118-
)
111+
def _fill_item_response(*null_score_columns: str) -> Generator[pl.Expr, None, None]:
112+
for col in null_score_columns:
113+
yield pl.col(f"{col}__response").alias(col)
119114

120115
@staticmethod
121116
def _pivot_singleselect(
122117
df: pl.DataFrame, option_scores: pl.DataFrame
123118
) -> pl.DataFrame:
124-
# Score single select responses.
125-
response_options = option_scores.with_columns(
126-
pl.col("item_option_value").alias("response_item"),
127-
pl.col("item_option_score").alias("response_score"),
128-
pl.col("item_option_name").alias("response_response"),
129-
).drop("item_option_score", "item_option_value", "item_option_name")
119+
# Rename columns in scores table.
120+
response_options = option_scores.rename(
121+
{
122+
"item_option_value": "response_index",
123+
"item_option_score": "response_score",
124+
"item_option_name": "response_response",
125+
}
126+
)
130127

131128
df = (
129+
# Extract value of response.
132130
df.with_columns(
133-
response_item=pl.col("response_value").struct.field("single_value")
131+
response_index=pl.col("response_value").struct.field("single_value")
134132
)
135133
.drop("response_value")
134+
# Join to score responses.
136135
.join(
137136
response_options,
138137
on=[
139138
"applet_version",
140139
"activity_flow",
141140
"activity",
142141
"item",
143-
"response_item",
142+
"response_index",
144143
],
145144
how="left",
146145
validate="m:1",
147146
)
147+
# Extract item name for pivot.
148148
.with_columns(item_name=pl.col("item").struct.field("name"))
149149
.drop("item")
150+
# Pivot on item_name producing 3 columns for each item.
150151
.pivot(on="item_name", values=cs.starts_with("response"), separator="__")
152+
# Rename pivoted columns to
151153
.with_columns(
152154
cs.starts_with("response").name.map(
153155
WideFormat._map_response_column_names
@@ -156,18 +158,18 @@ def _pivot_singleselect(
156158
.drop(cs.starts_with("response"))
157159
)
158160

159-
response_columns = {
161+
# Rename score columns to bare name of item.
162+
score_columns = {
160163
s: s.rsplit("__")[0]
161164
for s in cs.expand_selector(df, cs.ends_with("__score"))
162165
}
163-
return (
164-
df.rename(
165-
response_columns
166-
).with_columns( # Rename <QUESTION>__score to <QUESTION>.
167-
WideFormat._fill_item_response(*response_columns.values())
168-
) # Use value of __response if __score is null.
169-
# .drop(cs.ends_with("__item"))
170-
)
166+
# Rename <QUESTION>__score to <QUESTION>.
167+
df = df.rename(score_columns)
168+
null_score_columns = {
169+
col for col in score_columns.values() if df[col].is_null().all()
170+
}
171+
# Fill null <QUESTION> columns with value of <QUESTION>__response.
172+
return df.with_columns(WideFormat._fill_item_response(*null_score_columns))
171173

172174
@staticmethod
173175
def _pivot_text(df: pl.DataFrame, option_scores: pl.DataFrame) -> pl.DataFrame:

0 commit comments

Comments
 (0)