Skip to content

Commit 5ed223f

Browse files
committed
👔 Use given indicies where possible
1 parent f26210c commit 5ed223f

1 file changed

Lines changed: 50 additions & 29 deletions

File tree

src/mindlogger_data_export/outputs.py

Lines changed: 50 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -449,24 +449,15 @@ def _prepare_activity_columns(
449449
]
450450
)
451451

452-
# For non-text items, drop the `_response` columns
453-
# response_cols = [col for col in df.columns if col.endswith("_response")]
452+
# Handle text items uniquely
453+
response_cols = [col for col in df.columns if col.endswith("_response")]
454454
index_cols = [col for col in df.columns if col.endswith("_index")]
455-
# index_bases = {col.replace("_index", "") for col in index_cols}
456-
# text_item_response_cols = [
457-
# col
458-
# for col in response_cols
459-
# if col.replace("_response", "") not in index_bases
460-
# ]
461-
# df = df.select(
462-
# [
463-
# col
464-
# for col in df.columns
465-
# if not (
466-
# col.endswith("_response") and col not in text_item_response_cols
467-
# )
468-
# ]
469-
# )
455+
index_bases = {col.replace("_index", "") for col in index_cols}
456+
text_item_response_cols = [
457+
col
458+
for col in response_cols
459+
if col.replace("_response", "") not in index_bases
460+
]
470461

471462
# For items with `_index` but no `_score`, create `_score` from `_index`
472463
score_cols = [col for col in df.columns if col.endswith("_score")]
@@ -477,7 +468,7 @@ def _prepare_activity_columns(
477468
score_col = f"{base_name}_score"
478469
df = df.with_columns([pl.col(col).alias(score_col)])
479470

480-
# Drop multiselect response_options columns (they're redundant - all options share same list)
471+
# Drop multiselect response_options columns (they're redundant)
481472
df = df.select(
482473
[
483474
col
@@ -489,27 +480,57 @@ def _prepare_activity_columns(
489480
]
490481
)
491482

492-
# Create REDCap `_response` columns from `_index` for select items (`_index + 1`)
493-
# for col in index_cols:
494-
# response_col = col.replace("_index", "_response")
495-
# df = df.with_columns([(pl.col(col) + 1).alias(response_col)])
483+
# Create REDCap `_response` columns
484+
# If the original response value starts with a number, use that number; otherwise use index + 1
485+
for col in [_ for _ in index_cols if _ not in text_item_response_cols]:
486+
response_col = col.replace("_index", "_response")
487+
base_name = col.replace("_index", "")
488+
# Check if there's an existing response column with values that start with numbers
489+
original_response_col = f"{base_name}_response"
490+
if original_response_col in df.columns:
491+
# Try to extract leading number from response value, fall back to index + 1
492+
df = df.with_columns(
493+
[
494+
pl.when(
495+
pl.col(original_response_col)
496+
.cast(pl.Utf8)
497+
.str.extract(r"^(\d+)", 1)
498+
.is_not_null()
499+
)
500+
.then(
501+
pl.col(original_response_col)
502+
.cast(pl.Utf8)
503+
.str.extract(r"^(\d+)", 1)
504+
.cast(pl.Int64)
505+
)
506+
.otherwise(pl.col(col) + 1)
507+
.alias(response_col)
508+
]
509+
)
510+
else:
511+
# No original response column, use index + 1
512+
df = df.with_columns([(pl.col(col) + 1).alias(response_col)])
496513

497-
# Drop bare item columns that have _response versions
514+
# Drop bare item columns that have _response, _score, or _index versions
498515
response_bases = {
499516
col.replace("_response", "")
500517
for col in df.columns
501518
if col.endswith("_response")
502519
}
503-
df = df.select(
520+
score_bases = {
521+
col.replace("_score", "") for col in df.columns if col.endswith("_score")
522+
}
523+
index_bases = {
524+
col.replace("_index", "") for col in df.columns if col.endswith("_index")
525+
}
526+
527+
return df.select(
504528
[
505529
col
506530
for col in df.columns
507-
if not (col in response_bases and f"{col}_response" in df.columns)
531+
if col not in response_bases | score_bases | index_bases
508532
]
509-
)
510-
511-
# Drop response metadata columns
512-
return df.select(
533+
).select(
513534
[
514535
col
515536
for col in df.columns

0 commit comments

Comments
 (0)