@@ -449,24 +449,15 @@ def _prepare_activity_columns(
449449 ]
450450 )
451451
452- # For non- text items, drop the `_response` columns
453- # response_cols = [col for col in df.columns if col.endswith("_response")]
452+ # Handle text items uniquely
453+ response_cols = [col for col in df .columns if col .endswith ("_response" )]
454454 index_cols = [col for col in df .columns if col .endswith ("_index" )]
455- # index_bases = {col.replace("_index", "") for col in index_cols}
456- # text_item_response_cols = [
457- # col
458- # for col in response_cols
459- # if col.replace("_response", "") not in index_bases
460- # ]
461- # df = df.select(
462- # [
463- # col
464- # for col in df.columns
465- # if not (
466- # col.endswith("_response") and col not in text_item_response_cols
467- # )
468- # ]
469- # )
455+ index_bases = {col .replace ("_index" , "" ) for col in index_cols }
456+ text_item_response_cols = [
457+ col
458+ for col in response_cols
459+ if col .replace ("_response" , "" ) not in index_bases
460+ ]
470461
471462 # For items with `_index` but no `_score`, create `_score` from `_index`
472463 score_cols = [col for col in df .columns if col .endswith ("_score" )]
@@ -477,7 +468,7 @@ def _prepare_activity_columns(
477468 score_col = f"{ base_name } _score"
478469 df = df .with_columns ([pl .col (col ).alias (score_col )])
479470
480- # Drop multiselect response_options columns (they're redundant - all options share same list )
471+ # Drop multiselect response_options columns (they're redundant)
481472 df = df .select (
482473 [
483474 col
@@ -489,27 +480,57 @@ def _prepare_activity_columns(
489480 ]
490481 )
491482
492- # Create REDCap `_response` columns from `_index` for select items (`_index + 1`)
493- # for col in index_cols:
494- # response_col = col.replace("_index", "_response")
495- # df = df.with_columns([(pl.col(col) + 1).alias(response_col)])
483+ # Create REDCap `_response` columns
484+ # If the original response value starts with a number, use that number; otherwise use index + 1
485+ for col in [_ for _ in index_cols if _ not in text_item_response_cols ]:
486+ response_col = col .replace ("_index" , "_response" )
487+ base_name = col .replace ("_index" , "" )
488+ # Check if there's an existing response column with values that start with numbers
489+ original_response_col = f"{ base_name } _response"
490+ if original_response_col in df .columns :
491+ # Try to extract leading number from response value, fall back to index + 1
492+ df = df .with_columns (
493+ [
494+ pl .when (
495+ pl .col (original_response_col )
496+ .cast (pl .Utf8 )
497+ .str .extract (r"^(\d+)" , 1 )
498+ .is_not_null ()
499+ )
500+ .then (
501+ pl .col (original_response_col )
502+ .cast (pl .Utf8 )
503+ .str .extract (r"^(\d+)" , 1 )
504+ .cast (pl .Int64 )
505+ )
506+ .otherwise (pl .col (col ) + 1 )
507+ .alias (response_col )
508+ ]
509+ )
510+ else :
511+ # No original response column, use index + 1
512+ df = df .with_columns ([(pl .col (col ) + 1 ).alias (response_col )])
496513
497- # Drop bare item columns that have _response versions
514+ # Drop bare item columns that have _response, _score, or _index versions
498515 response_bases = {
499516 col .replace ("_response" , "" )
500517 for col in df .columns
501518 if col .endswith ("_response" )
502519 }
503- df = df .select (
520+ score_bases = {
521+ col .replace ("_score" , "" ) for col in df .columns if col .endswith ("_score" )
522+ }
523+ index_bases = {
524+ col .replace ("_index" , "" ) for col in df .columns if col .endswith ("_index" )
525+ }
526+
527+ return df .select (
504528 [
505529 col
506530 for col in df .columns
507- if not ( col in response_bases and f" { col } _response" in df . columns )
531+ if col not in response_bases | score_bases | index_bases
508532 ]
509- )
510-
511- # Drop response metadata columns
512- return df .select (
533+ ).select (
513534 [
514535 col
515536 for col in df .columns
0 commit comments