Skip to content

Commit 2d236ed

Browse files
Add unpack_kmer_to_vec(u64, k) -> Vec and variants
1 parent 2f902a4 commit 2d236ed

2 files changed

Lines changed: 43 additions & 1 deletion

File tree

src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,9 @@ pub use ascii_seq::{AsciiSeq, AsciiSeqVec};
142142
pub use packed_n_seq::{PackedNSeq, PackedNSeqVec};
143143
pub use packed_seq::{BitSeq, BitSeqVec, PackedSeq, PackedSeqVec};
144144
pub use packed_seq::{
145-
complement_base, complement_base_simd, complement_char, pack_char, unpack_base,
145+
complement_base, complement_base_simd, complement_char, pack_char, unpack_base, unpack_kmer,
146+
unpack_kmer_into_vec, unpack_kmer_to_vec, unpack_kmer_u128, unpack_kmer_u128_into_vec,
147+
unpack_kmer_u128_to_vec,
146148
};
147149
pub use padded_it::{Advance, ChunkIt, PaddedIt};
148150
pub use traits::{Delay, Seq, SeqVec};

src/packed_seq.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,46 @@ pub fn unpack_base(base: u8) -> u8 {
224224
b"ACTG"[base as usize]
225225
}
226226

227+
/// Unpack a 2-bit encoded kmer into corresponding `ACTG` characters.
228+
/// Slice the returned array to the correct length.
229+
#[inline(always)]
230+
pub fn unpack_kmer(kmer: u64) -> [u8; 32] {
231+
std::array::from_fn(|i| unpack_base(((kmer >> (2 * i)) & 3) as u8))
232+
}
233+
/// Unpack a 2-bit encoded kmer into corresponding `ACTG` character.
234+
#[inline(always)]
235+
pub fn unpack_kmer_into_vec(kmer: u64, k: usize, out: &mut Vec<u8>) {
236+
out.clear();
237+
out.extend((0..k).map(|i| unpack_base(((kmer >> (2 * i)) & 3) as u8)));
238+
}
239+
/// Unpack a 2-bit encoded kmer into corresponding `ACTG` character.
240+
#[inline(always)]
241+
pub fn unpack_kmer_to_vec(kmer: u64, k: usize) -> Vec<u8> {
242+
let mut out = vec![];
243+
unpack_kmer_into_vec(kmer, k, &mut out);
244+
out
245+
}
246+
247+
/// Unpack a 2-bit encoded kmer into corresponding `ACTG` characters.
248+
/// Slice the returned array to the correct length.
249+
#[inline(always)]
250+
pub fn unpack_kmer_u128(kmer: u128) -> [u8; 64] {
251+
std::array::from_fn(|i| unpack_base(((kmer >> (2 * i)) & 3) as u8))
252+
}
253+
/// Unpack a 2-bit encoded kmer into corresponding `ACTG` character.
254+
#[inline(always)]
255+
pub fn unpack_kmer_u128_into_vec(kmer: u128, k: usize, out: &mut Vec<u8>) {
256+
out.clear();
257+
out.extend((0..k).map(|i| unpack_base(((kmer >> (2 * i)) & 3) as u8)));
258+
}
259+
/// Unpack a 2-bit encoded kmer into corresponding `ACTG` character.
260+
#[inline(always)]
261+
pub fn unpack_kmer_u128_to_vec(kmer: u128, k: usize) -> Vec<u8> {
262+
let mut out = vec![];
263+
unpack_kmer_u128_into_vec(kmer, k, &mut out);
264+
out
265+
}
266+
227267
/// Complement an ASCII character: `A<>T` and `C<>G`.
228268
#[inline(always)]
229269
pub const fn complement_char(base: u8) -> u8 {

0 commit comments

Comments
 (0)