@@ -157,40 +157,65 @@ def _worker_quantize(
157157
158158 client = RedisConnectionFactory .get_redis_connection (redis_url = redis_url )
159159 try :
160- # Phase 1: Dump originals to backup shard
161- backup = VectorBackup .create (
162- path = backup_path ,
163- index_name = index_name ,
164- fields = datatype_changes ,
165- batch_size = batch_size ,
166- )
160+ # Try to resume from existing backup shard first
161+ backup = VectorBackup .load (backup_path )
162+ if backup is not None :
163+ logger .info (
164+ "Worker %d: resuming from existing backup (phase=%s, "
165+ "dump_batches=%d, quantize_batches=%d)" ,
166+ worker_id ,
167+ backup .header .phase ,
168+ backup .header .dump_completed_batches ,
169+ backup .header .quantize_completed_batches ,
170+ )
171+ else :
172+ backup = VectorBackup .create (
173+ path = backup_path ,
174+ index_name = index_name ,
175+ fields = datatype_changes ,
176+ batch_size = batch_size ,
177+ )
167178
168179 total = len (keys )
169- for batch_start in range (0 , total , batch_size ):
170- batch_keys = keys [batch_start : batch_start + batch_size ]
171- originals = pipeline_read_vectors (client , batch_keys , datatype_changes )
172- backup .write_batch (batch_start // batch_size , batch_keys , originals )
173- if progress_callback :
174- progress_callback (
175- "dump" , worker_id , min (batch_start + batch_size , total )
176- )
177-
178- backup .mark_dump_complete ()
179-
180- # Phase 2: Convert + write from backup
181- backup .start_quantize ()
182- docs_quantized = 0
183-
184- for batch_idx , (batch_keys , originals ) in enumerate (backup .iter_batches ()):
185- converted = convert_vectors (originals , datatype_changes )
186- if converted :
187- pipeline_write_vectors (client , converted )
188- backup .mark_batch_quantized (batch_idx )
189- docs_quantized += len (batch_keys )
190- if progress_callback :
191- progress_callback ("quantize" , worker_id , docs_quantized )
192-
193- backup .mark_complete ()
180+
181+ # Phase 1: Dump originals to backup shard (skip if already complete)
182+ if backup .header .phase == "dump" :
183+ start_batch = backup .header .dump_completed_batches
184+ for batch_start in range (start_batch * batch_size , total , batch_size ):
185+ batch_keys = keys [batch_start : batch_start + batch_size ]
186+ originals = pipeline_read_vectors (client , batch_keys , datatype_changes )
187+ backup .write_batch (batch_start // batch_size , batch_keys , originals )
188+ if progress_callback :
189+ progress_callback (
190+ "dump" , worker_id , min (batch_start + batch_size , total )
191+ )
192+ backup .mark_dump_complete ()
193+
194+ # Phase 2: Convert + write from backup (skip completed batches)
195+ if backup .header .phase in ("ready" , "active" ):
196+ backup .start_quantize ()
197+ docs_quantized = 0
198+
199+ for batch_idx , (batch_keys , originals ) in enumerate (backup .iter_batches ()):
200+ if batch_idx < backup .header .quantize_completed_batches :
201+ docs_quantized += len (batch_keys )
202+ continue
203+ converted = convert_vectors (originals , datatype_changes )
204+ if converted :
205+ pipeline_write_vectors (client , converted )
206+ backup .mark_batch_quantized (batch_idx )
207+ docs_quantized += len (batch_keys )
208+ if progress_callback :
209+ progress_callback ("quantize" , worker_id , docs_quantized )
210+
211+ backup .mark_complete ()
212+ elif backup .header .phase == "completed" :
213+ # Already done from previous run
214+ docs_quantized = sum (
215+ 1 for _ in range (0 , total , batch_size ) for _ in keys [:batch_size ]
216+ )
217+ docs_quantized = total
218+
194219 return {"worker_id" : worker_id , "docs" : docs_quantized }
195220 finally :
196221 try :
@@ -309,62 +334,82 @@ async def _async_worker_quantize(
309334
310335 client = aioredis .from_url (redis_url )
311336 try :
312- # Phase 1: Dump originals
313- backup = VectorBackup .create (
314- path = backup_path ,
315- index_name = index_name ,
316- fields = datatype_changes ,
317- batch_size = batch_size ,
318- )
337+ # Try to resume from existing backup shard first
338+ backup = VectorBackup .load (backup_path )
339+ if backup is not None :
340+ logger .info (
341+ "Async worker %d: resuming from existing backup (phase=%s, "
342+ "dump_batches=%d, quantize_batches=%d)" ,
343+ worker_id ,
344+ backup .header .phase ,
345+ backup .header .dump_completed_batches ,
346+ backup .header .quantize_completed_batches ,
347+ )
348+ else :
349+ backup = VectorBackup .create (
350+ path = backup_path ,
351+ index_name = index_name ,
352+ fields = datatype_changes ,
353+ batch_size = batch_size ,
354+ )
319355
320356 total = len (keys )
321357 field_names = list (datatype_changes .keys ())
322358
323- for batch_start in range (0 , total , batch_size ):
324- batch_keys = keys [batch_start : batch_start + batch_size ]
325- pipe = client .pipeline (transaction = False )
326- call_order : List [tuple ] = []
327- for key in batch_keys :
328- for field_name in field_names :
329- pipe .hget (key , field_name )
330- call_order .append ((key , field_name ))
331- results = await pipe .execute ()
332-
333- originals : Dict [str , Dict [str , bytes ]] = {}
334- for (key , field_name ), value in zip (call_order , results ):
335- if value is not None :
336- if key not in originals :
337- originals [key ] = {}
338- originals [key ][field_name ] = value
339-
340- backup .write_batch (batch_start // batch_size , batch_keys , originals )
341- if progress_callback :
342- progress_callback (
343- "dump" , worker_id , min (batch_start + batch_size , total )
344- )
345-
346- backup .mark_dump_complete ()
347-
348- # Phase 2: Convert + write from backup
349- backup .start_quantize ()
350- docs_quantized = 0
351-
352- for batch_idx , (batch_keys , batch_originals ) in enumerate (
353- backup .iter_batches ()
354- ):
355- converted = convert_vectors (batch_originals , datatype_changes )
356- if converted :
359+ # Phase 1: Dump originals (skip if already complete)
360+ if backup .header .phase == "dump" :
361+ start_batch = backup .header .dump_completed_batches
362+ for batch_start in range (start_batch * batch_size , total , batch_size ):
363+ batch_keys = keys [batch_start : batch_start + batch_size ]
357364 pipe = client .pipeline (transaction = False )
358- for key , fields in converted .items ():
359- for field_name , data in fields .items ():
360- pipe .hset (key , field_name , data )
361- await pipe .execute ()
362- backup .mark_batch_quantized (batch_idx )
363- docs_quantized += len (batch_keys )
364- if progress_callback :
365- progress_callback ("quantize" , worker_id , docs_quantized )
366-
367- backup .mark_complete ()
365+ call_order : List [tuple ] = []
366+ for key in batch_keys :
367+ for field_name in field_names :
368+ pipe .hget (key , field_name )
369+ call_order .append ((key , field_name ))
370+ results = await pipe .execute ()
371+
372+ originals : Dict [str , Dict [str , bytes ]] = {}
373+ for (key , field_name ), value in zip (call_order , results ):
374+ if value is not None :
375+ if key not in originals :
376+ originals [key ] = {}
377+ originals [key ][field_name ] = value
378+
379+ backup .write_batch (batch_start // batch_size , batch_keys , originals )
380+ if progress_callback :
381+ progress_callback (
382+ "dump" , worker_id , min (batch_start + batch_size , total )
383+ )
384+ backup .mark_dump_complete ()
385+
386+ # Phase 2: Convert + write from backup (skip completed batches)
387+ if backup .header .phase in ("ready" , "active" ):
388+ backup .start_quantize ()
389+ docs_quantized = 0
390+
391+ for batch_idx , (batch_keys , batch_originals ) in enumerate (
392+ backup .iter_batches ()
393+ ):
394+ if batch_idx < backup .header .quantize_completed_batches :
395+ docs_quantized += len (batch_keys )
396+ continue
397+ converted = convert_vectors (batch_originals , datatype_changes )
398+ if converted :
399+ pipe = client .pipeline (transaction = False )
400+ for key , fields in converted .items ():
401+ for field_name , data in fields .items ():
402+ pipe .hset (key , field_name , data )
403+ await pipe .execute ()
404+ backup .mark_batch_quantized (batch_idx )
405+ docs_quantized += len (batch_keys )
406+ if progress_callback :
407+ progress_callback ("quantize" , worker_id , docs_quantized )
408+
409+ backup .mark_complete ()
410+ elif backup .header .phase == "completed" :
411+ docs_quantized = total
412+
368413 return {"worker_id" : worker_id , "docs" : docs_quantized }
369414 finally :
370415 await client .aclose ()
0 commit comments