Skip to content

Commit 1669976

Browse files
Jing-yilinclaude
andcommitted
fix: force drop index and use DISTINCT ON for robust deduplication
Previous approach failed because: 1. All snapshot_date values were NULL, so IS NOT NULL check matched 0 rows 2. Existing index prevents deduplication from completing New strategy: 1. DROP INDEX IF EXISTS to allow deduplication without constraint violations 2. DELETE all rows with NULL snapshot_date (should not exist post-migration) 3. Use PostgreSQL DISTINCT ON for efficient deduplication: - Keeps row with MAX(snapshot_at) for each (campaign_pid, snapshot_date) pair - More efficient than self-join USING pattern 4. Let AutoMigrate recreate the index on clean data Logs showed "deduplicated 0 rows" because NULL != NULL in SQL. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 45a7094 commit 1669976

1 file changed

Lines changed: 19 additions & 12 deletions

File tree

backend/internal/db/db.go

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,28 @@ func Init(cfg *config.Config) error {
177177
return fmt.Errorf("pre-migrate campaign_snapshots columns: %w", err)
178178
}
179179

180+
// Drop existing unique index if it exists (from previous failed migrations).
181+
// This allows us to clean up duplicate data without constraint violations.
182+
DB.Exec(`DROP INDEX IF EXISTS idx_campaign_snapshot_date`)
183+
184+
// Delete all snapshots with NULL snapshot_date (should not exist after migration).
185+
deleteNull := DB.Exec(`DELETE FROM campaign_snapshots WHERE snapshot_date IS NULL`)
186+
log.Printf("DB init: deleted %d snapshots with NULL snapshot_date", deleteNull.RowsAffected)
187+
180188
// Deduplicate snapshots: keep only the latest per (campaign_pid, snapshot_date).
181-
// Run this EVERY time to handle legacy duplicate data from buggy upsert logic.
182-
// Must run BEFORE AutoMigrate attempts to create the unique index.
183-
result := DB.Exec(`
184-
DELETE FROM campaign_snapshots a USING campaign_snapshots b
185-
WHERE a.campaign_pid = b.campaign_pid
186-
AND a.snapshot_date IS NOT NULL
187-
AND b.snapshot_date IS NOT NULL
188-
AND a.snapshot_date = b.snapshot_date
189-
AND a.snapshot_at < b.snapshot_at
189+
// Use DISTINCT ON for efficiency - keeps row with MAX(snapshot_at) for each (pid, date) pair.
190+
dedup := DB.Exec(`
191+
DELETE FROM campaign_snapshots
192+
WHERE id NOT IN (
193+
SELECT DISTINCT ON (campaign_pid, snapshot_date) id
194+
FROM campaign_snapshots
195+
ORDER BY campaign_pid, snapshot_date, snapshot_at DESC
196+
)
190197
`)
191-
if result.Error != nil {
192-
return fmt.Errorf("deduplicate campaign_snapshots: %w", result.Error)
198+
if dedup.Error != nil {
199+
return fmt.Errorf("deduplicate campaign_snapshots: %w", dedup.Error)
193200
}
194-
log.Printf("DB init: deduplicated %d snapshot rows", result.RowsAffected)
201+
log.Printf("DB init: deduplicated %d snapshot rows", dedup.RowsAffected)
195202

196203
// NOW run AutoMigrate after all column renames are complete
197204
if err := DB.AutoMigrate(

0 commit comments

Comments
 (0)