11use core:: cell:: RefCell ;
2+ use std:: ops:: { Deref , DerefMut } ;
23use traits:: Seq ;
34use wide:: u16x8;
45
@@ -35,6 +36,21 @@ impl RecycledBox {
3536 }
3637}
3738
39+ impl Deref for RecycledBox {
40+ type Target = SimdBuf ;
41+
42+ #[ inline( always) ]
43+ fn deref ( & self ) -> & Self :: Target {
44+ self . get ( )
45+ }
46+ }
47+ impl DerefMut for RecycledBox {
48+ #[ inline( always) ]
49+ fn deref_mut ( & mut self ) -> & mut SimdBuf {
50+ self . get_mut ( )
51+ }
52+ }
53+
3854impl Drop for RecycledBox {
3955 #[ inline( always) ]
4056 fn drop ( & mut self ) {
@@ -495,6 +511,72 @@ where
495511
496512 #[ inline( always) ]
497513 fn par_iter_bp ( self , context : usize ) -> PaddedIt < impl ChunkIt < S > > {
514+ // Boxed, so it doesn't consume precious registers.
515+ // Without this, cur is not always inlined into a register.
516+ let mut buf = IT_BUF . with_borrow_mut ( |v| RecycledBox ( v. pop ( ) ) ) ;
517+ buf. init_if_needed ( ) ;
518+ self . par_iter_bp_with_buf ( context, buf)
519+ }
520+
521+ #[ inline( always) ]
522+ fn par_iter_bp_delayed ( self , context : usize , delay : Delay ) -> PaddedIt < impl ChunkIt < ( S , S ) > > {
523+ self . par_iter_bp_delayed_with_factor ( context, delay, 1 )
524+ }
525+
526+ /// NOTE: When `self` starts does not start at a byte boundary, the
527+ /// 'delayed' character is not guaranteed to be `0`.
528+ #[ inline( always) ]
529+ fn par_iter_bp_delayed_2 (
530+ self ,
531+ context : usize ,
532+ delay1 : Delay ,
533+ delay2 : Delay ,
534+ ) -> PaddedIt < impl ChunkIt < ( S , S , S ) > > {
535+ self . par_iter_bp_delayed_2_with_factor ( context, delay1, delay2, 1 )
536+ }
537+
538+ /// Compares 29 characters at a time.
539+ fn cmp_lcp ( & self , other : & Self ) -> ( std:: cmp:: Ordering , usize ) {
540+ let mut lcp = 0 ;
541+ let min_len = self . len . min ( other. len ) ;
542+ for i in ( 0 ..min_len) . step_by ( Self :: K64 ) {
543+ let len = ( min_len - i) . min ( Self :: K64 ) ;
544+ let this = self . slice ( i..i + len) ;
545+ let other = other. slice ( i..i + len) ;
546+ let this_word = this. as_u64 ( ) ;
547+ let other_word = other. as_u64 ( ) ;
548+ if this_word != other_word {
549+ // Unfortunately, bases are packed in little endian order, so the default order is reversed.
550+ let eq = this_word ^ other_word;
551+ let t = eq. trailing_zeros ( ) as usize / B * B ;
552+ lcp += t / B ;
553+ let mask = ( Self :: CHAR_MASK ) << t;
554+ return ( ( this_word & mask) . cmp ( & ( other_word & mask) ) , lcp) ;
555+ }
556+ lcp += len;
557+ }
558+ ( self . len . cmp ( & other. len ) , lcp)
559+ }
560+
561+ #[ inline( always) ]
562+ fn get ( & self , index : usize ) -> u8 {
563+ let offset = self . offset + index;
564+ let idx = offset / Self :: C8 ;
565+ let offset = offset % Self :: C8 ;
566+ ( self . seq [ idx] >> ( B * offset) ) & Self :: CHAR_MASK as u8
567+ }
568+ }
569+
570+ impl < ' s , const B : usize > PackedSeqBase < ' s , B >
571+ where
572+ Bits < B > : SupportedBits ,
573+ {
574+ #[ inline( always) ]
575+ pub fn par_iter_bp_with_buf < BUF : DerefMut < Target = [ S ; 8 ] > > (
576+ self ,
577+ context : usize ,
578+ mut buf : BUF ,
579+ ) -> PaddedIt < impl ChunkIt < S > + use < ' s , B , BUF > > {
498580 #[ cfg( target_endian = "big" ) ]
499581 panic ! ( "Big endian architectures are not supported." ) ;
500582
@@ -516,11 +598,6 @@ where
516598 let offsets: [ usize ; 8 ] = from_fn ( |l| l * bytes_per_chunk) ;
517599 let mut cur = S :: ZERO ;
518600
519- // Boxed, so it doesn't consume precious registers.
520- // Without this, cur is not always inlined into a register.
521- let mut buf = IT_BUF . with_borrow_mut ( |v| RecycledBox ( v. pop ( ) ) ) ;
522- buf. init_if_needed ( ) ;
523-
524601 let simd_char_mask: u32x8 = S :: splat ( Self :: CHAR_MASK as u32 ) ;
525602 let simd_b: u32x8 = S :: splat ( B as u32 ) ;
526603
@@ -550,9 +627,9 @@ where
550627 )
551628 } ,
552629 ) ;
553- * buf. get_mut ( ) = transpose ( data) ;
630+ * buf = transpose ( data) ;
554631 }
555- cur = buf. get ( ) [ ( i % Self :: C256 ) / Self :: C32 ] ;
632+ cur = buf[ ( i % Self :: C256 ) / Self :: C32 ] ;
556633 }
557634 // Extract the last 2 bits of each character.
558635 let chars = cur & simd_char_mask;
@@ -566,59 +643,6 @@ where
566643 PaddedIt { it, padding }
567644 }
568645
569- #[ inline( always) ]
570- fn par_iter_bp_delayed ( self , context : usize , delay : Delay ) -> PaddedIt < impl ChunkIt < ( S , S ) > > {
571- self . par_iter_bp_delayed_with_factor ( context, delay, 1 )
572- }
573-
574- /// NOTE: When `self` starts does not start at a byte boundary, the
575- /// 'delayed' character is not guaranteed to be `0`.
576- #[ inline( always) ]
577- fn par_iter_bp_delayed_2 (
578- self ,
579- context : usize ,
580- delay1 : Delay ,
581- delay2 : Delay ,
582- ) -> PaddedIt < impl ChunkIt < ( S , S , S ) > > {
583- self . par_iter_bp_delayed_2_with_factor ( context, delay1, delay2, 1 )
584- }
585-
586- /// Compares 29 characters at a time.
587- fn cmp_lcp ( & self , other : & Self ) -> ( std:: cmp:: Ordering , usize ) {
588- let mut lcp = 0 ;
589- let min_len = self . len . min ( other. len ) ;
590- for i in ( 0 ..min_len) . step_by ( Self :: K64 ) {
591- let len = ( min_len - i) . min ( Self :: K64 ) ;
592- let this = self . slice ( i..i + len) ;
593- let other = other. slice ( i..i + len) ;
594- let this_word = this. as_u64 ( ) ;
595- let other_word = other. as_u64 ( ) ;
596- if this_word != other_word {
597- // Unfortunately, bases are packed in little endian order, so the default order is reversed.
598- let eq = this_word ^ other_word;
599- let t = eq. trailing_zeros ( ) as usize / B * B ;
600- lcp += t / B ;
601- let mask = ( Self :: CHAR_MASK ) << t;
602- return ( ( this_word & mask) . cmp ( & ( other_word & mask) ) , lcp) ;
603- }
604- lcp += len;
605- }
606- ( self . len . cmp ( & other. len ) , lcp)
607- }
608-
609- #[ inline( always) ]
610- fn get ( & self , index : usize ) -> u8 {
611- let offset = self . offset + index;
612- let idx = offset / Self :: C8 ;
613- let offset = offset % Self :: C8 ;
614- ( self . seq [ idx] >> ( B * offset) ) & Self :: CHAR_MASK as u8
615- }
616- }
617-
618- impl < ' s , const B : usize > PackedSeqBase < ' s , B >
619- where
620- Bits < B > : SupportedBits ,
621- {
622646 #[ inline( always) ]
623647 pub fn par_iter_bp_delayed_with_factor (
624648 self ,
0 commit comments