File tree Expand file tree Collapse file tree
src/mindlogger_data_export Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -108,7 +108,7 @@ class DeduplicateResponsesProcessor(ReportProcessor):
108108 """Deduplicate responses, keeping latest by "activity_end_time".
109109
110110 This processor removes duplicate item responses for the same
111- user/activity/submission combination, keeping only the most recent entry.
111+ user/activity/submission/ITEM combination, keeping only the most recent entry.
112112 """
113113
114114 NAME = "DeduplicateResponses"
@@ -117,8 +117,13 @@ class DeduplicateResponsesProcessor(ReportProcessor):
117117
118118 def _run (self , report : pl .DataFrame ) -> pl .DataFrame :
119119 """Deduplicate report by keeping latest activity_end_time."""
120- # Define the columns that should be unique
121- unique_cols = ["target_user_secret_id" , "source_user_secret_id" , "activity_id" ]
120+ # Define the columns that should be unique PER ITEM
121+ unique_cols = [
122+ "target_user_secret_id" ,
123+ "source_user_secret_id" ,
124+ "activity_id" ,
125+ "item_id" ,
126+ ]
122127
123128 # Check which columns actually exist in the report
124129 existing_unique_cols = [col for col in unique_cols if col in report .columns ]
You can’t perform that action at this time.
0 commit comments