Skip to content

Commit 5010d47

Browse files
committed
fix: address 7 review issues — AEAD test, stream cross-test, child_level KDF, parallel decrypt, DataMap parsing, bounds check, test rename
- Add AEAD tamper detection integration test (Issue 1) - Fix stream encrypt/decrypt cross-implementation test (Issue 2) - Pass child_level through KDF for domain separation in shrink_data_map (Issue 3) - Parallelize decrypt_sorted_set with rayon par_iter (Issue 4) - Simplify DataMap::from_bytes to delegate to bincode (Issue 5) - Add bounds check in get_n_1_n_2 for out-of-range chunk_index (Issue 6) - Rename test_key_derivation_sizes to test_key_derivation_sizes_and_roundtrip (Issue 7)
1 parent 41a17ff commit 5010d47

5 files changed

Lines changed: 94 additions & 68 deletions

File tree

src/data_map.rs

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,27 +33,10 @@ impl DataMap {
3333
bincode::serialize(self)
3434
}
3535

36-
/// Deserialize DataMap from bytes (v1 versioned format)
36+
/// Deserialize DataMap from bytes (v1 versioned format).
37+
/// Delegates to the `Deserialize` impl which handles version checking.
3738
pub fn from_bytes(bytes: &[u8]) -> Result<Self, bincode::Error> {
38-
#[derive(Deserialize)]
39-
struct VersionedDataMap {
40-
version: u8,
41-
chunk_identifiers: Vec<ChunkInfo>,
42-
child: Option<usize>,
43-
}
44-
45-
let versioned = bincode::deserialize::<VersionedDataMap>(bytes)?;
46-
if versioned.version == 1 {
47-
Ok(DataMap {
48-
chunk_identifiers: versioned.chunk_identifiers,
49-
child: versioned.child,
50-
})
51-
} else {
52-
Err(Box::new(bincode::ErrorKind::Custom(format!(
53-
"unsupported DataMap version: {ver}",
54-
ver = versioned.version
55-
))))
56-
}
39+
bincode::deserialize(bytes)
5740
}
5841
}
5942

src/decrypt.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
use crate::{cipher, utils::get_pad_key_and_nonce, utils::xor, EncryptedChunk, Error, Result};
1010
use bytes::Bytes;
11+
use rayon::prelude::*;
1112
use std::io::Cursor;
1213
use xor_name::XorName;
1314

@@ -17,12 +18,19 @@ pub fn decrypt_sorted_set(
1718
encrypted_chunks: &[&EncryptedChunk],
1819
child_level: usize,
1920
) -> Result<Bytes> {
20-
let mut all_bytes = Vec::new();
21-
22-
// Process chunks sequentially to maintain proper boundaries
23-
for (chunk_index, chunk) in encrypted_chunks.iter().enumerate() {
24-
let decrypted = decrypt_chunk(chunk_index, &chunk.content, &src_hashes, child_level)?;
25-
all_bytes.extend_from_slice(&decrypted);
21+
// Decrypt chunks in parallel, then concatenate in order
22+
let decrypted_chunks: Vec<Bytes> = encrypted_chunks
23+
.par_iter()
24+
.enumerate()
25+
.map(|(chunk_index, chunk)| {
26+
decrypt_chunk(chunk_index, &chunk.content, &src_hashes, child_level)
27+
})
28+
.collect::<Result<Vec<_>>>()?;
29+
30+
let total_len = decrypted_chunks.iter().map(|c| c.len()).sum();
31+
let mut all_bytes = Vec::with_capacity(total_len);
32+
for chunk in &decrypted_chunks {
33+
all_bytes.extend_from_slice(chunk);
2634
}
2735

2836
Ok(Bytes::from(all_bytes))

src/lib.rs

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,14 @@ pub const COMPRESSION_QUALITY: i32 = 6;
153153
/// Encrypts a set of bytes and returns the encrypted data together with
154154
/// the data map that is derived from the input data.
155155
pub fn encrypt(bytes: Bytes) -> Result<(DataMap, Vec<EncryptedChunk>)> {
156+
encrypt_with_child_level(bytes, 0)
157+
}
158+
159+
/// Internal encryption that accepts a child_level for KDF domain separation.
160+
fn encrypt_with_child_level(
161+
bytes: Bytes,
162+
child_level: usize,
163+
) -> Result<(DataMap, Vec<EncryptedChunk>)> {
156164
let file_size = bytes.len();
157165
if file_size < MIN_ENCRYPTABLE_BYTES {
158166
return Err(Error::Generic(format!(
@@ -186,7 +194,7 @@ pub fn encrypt(bytes: Bytes) -> Result<(DataMap, Vec<EncryptedChunk>)> {
186194
}
187195

188196
// For chunks 2 onwards, we can encrypt immediately since we have the previous two hashes
189-
let pki = get_pad_key_and_nonce(chunk_index, &src_hashes, 0)?;
197+
let pki = get_pad_key_and_nonce(chunk_index, &src_hashes, child_level)?;
190198
let encrypted_content = encrypt::encrypt_chunk(chunk_data, pki)?;
191199
let dst_hash = hash::content_hash(&encrypted_content);
192200

@@ -204,7 +212,7 @@ pub fn encrypt(bytes: Bytes) -> Result<(DataMap, Vec<EncryptedChunk>)> {
204212

205213
// Now process the first two chunks using the complete set of source hashes
206214
for (chunk_index, chunk_data, src_hash, src_size) in first_chunks {
207-
let pki = get_pad_key_and_nonce(chunk_index, &src_hashes, 0)?;
215+
let pki = get_pad_key_and_nonce(chunk_index, &src_hashes, child_level)?;
208216
let encrypted_content = encrypt::encrypt_chunk(chunk_data, pki)?;
209217
let dst_hash = hash::content_hash(&encrypted_content);
210218

@@ -252,8 +260,7 @@ pub fn encrypt(bytes: Bytes) -> Result<(DataMap, Vec<EncryptedChunk>)> {
252260
/// * `Result<Bytes>` - The decrypted data or an error if chunks are missing/corrupted
253261
pub(crate) fn decrypt_full_set(data_map: &DataMap, chunks: &[EncryptedChunk]) -> Result<Bytes> {
254262
let src_hashes = extract_hashes(data_map);
255-
// encrypt() always uses child_level=0 for key derivation, so decrypt must match
256-
let child_level = 0;
263+
let child_level = data_map.child().unwrap_or(0);
257264

258265
// Create a mapping of chunk hashes to chunks for efficient lookup
259266
let chunk_map: HashMap<XorName, &EncryptedChunk> = chunks
@@ -327,8 +334,12 @@ pub(crate) fn decrypt_range(
327334
let mut all_bytes = Vec::new();
328335
for (idx, chunk) in sorted_chunks.iter().enumerate() {
329336
let chunk_idx = start_chunk + idx;
330-
// encrypt() always uses child_level=0 for key derivation, so decrypt must match
331-
let decrypted = decrypt_chunk(chunk_idx, &chunk.content, &src_hashes, 0)?;
337+
let decrypted = decrypt_chunk(
338+
chunk_idx,
339+
&chunk.content,
340+
&src_hashes,
341+
data_map.child().unwrap_or(0),
342+
)?;
332343
all_bytes.extend_from_slice(&decrypted);
333344
}
334345

@@ -362,22 +373,23 @@ where
362373
let mut all_chunks = Vec::new();
363374

364375
while data_map.len() > 3 {
365-
let child_level = data_map.child().unwrap_or(0);
376+
let next_child_level = data_map.child().map_or(1, |c| c + 1);
366377
let bytes = data_map
367378
.to_bytes()
368379
.map(Bytes::from)
369380
.map_err(|e| Error::Generic(format!("Failed to serialize data map: {e}")))?;
370381

371-
let (mut new_data_map, encrypted_chunks) = encrypt(bytes)?;
382+
let (mut new_data_map, encrypted_chunks) =
383+
encrypt_with_child_level(bytes, next_child_level)?;
372384

373385
// Store and collect chunks
374386
for chunk in &encrypted_chunks {
375387
store_chunk(hash::content_hash(&chunk.content), chunk.content.clone())?;
376388
}
377389
all_chunks.extend(encrypted_chunks);
378390

379-
// Update data map for next iteration
380-
new_data_map = DataMap::with_child(new_data_map.infos().to_vec(), child_level + 1);
391+
// Tag the DataMap with the child_level used during encryption
392+
new_data_map = DataMap::with_child(new_data_map.infos().to_vec(), next_child_level);
381393
data_map = new_data_map;
382394
}
383395
Ok((data_map, all_chunks))

src/utils.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ pub(crate) fn get_n_1_n_2(
5151
"total_num_chunks must be at least 3, got {total_num_chunks}"
5252
)));
5353
}
54+
if chunk_index >= total_num_chunks {
55+
return Err(crate::Error::Generic(format!(
56+
"chunk_index {chunk_index} out of bounds for total_num_chunks {total_num_chunks}"
57+
)));
58+
}
5459
match chunk_index {
5560
0 => Ok((total_num_chunks - 1, total_num_chunks - 2)),
5661
1 => Ok((0, total_num_chunks - 1)),
@@ -337,6 +342,16 @@ mod tests {
337342
);
338343
}
339344

345+
#[test]
346+
fn test_get_n_1_n_2_out_of_bounds() {
347+
let result = get_n_1_n_2(5, 3);
348+
assert!(result.is_err(), "chunk_index >= total should fail");
349+
let result = get_n_1_n_2(3, 3);
350+
assert!(result.is_err(), "chunk_index == total should fail");
351+
let result = get_n_1_n_2(2, 3);
352+
assert!(result.is_ok(), "chunk_index < total should succeed");
353+
}
354+
340355
#[test]
341356
fn test_kdf_output_sizes() {
342357
let src = XorName([0x01; 32]);

tests/integration_tests.rs

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -967,26 +967,14 @@ fn test_stream_encrypt_file_decrypt_storage_cross() -> Result<()> {
967967

968968
#[test]
969969
fn test_file_encrypt_stream_decrypt_cross() -> Result<()> {
970+
// Use standard encrypt() then streaming decrypt — cross-compatibility test
970971
let data_size = 150_000;
971972
let original_data = random_bytes(data_size);
972973

973-
let mut stream = stream_encrypt(
974-
data_size,
975-
original_data.chunks(8192).map(|c| Bytes::from(c.to_vec())),
976-
)?;
977-
978-
let mut storage = HashMap::new();
979-
for chunk_result in stream.chunks() {
980-
let (hash, content) = chunk_result?;
981-
let _ = storage.insert(hash, content.to_vec());
982-
}
983-
984-
let data_map = stream
985-
.datamap()
986-
.ok_or_else(|| Error::Generic("No DataMap".to_string()))?
987-
.clone();
988-
974+
let (data_map, encrypted_chunks) = encrypt(original_data.clone())?;
975+
let storage = build_chunk_storage(&encrypted_chunks);
989976
let fetcher = make_parallel_fetcher(&storage);
977+
990978
let decrypt_stream = streaming_decrypt(&data_map, fetcher)?;
991979
let decrypted = decrypt_stream.range_full()?;
992980

@@ -1297,6 +1285,40 @@ fn test_aead_tag_protects_integrity() -> Result<()> {
12971285
Ok(())
12981286
}
12991287

1288+
// --- AEAD pipeline integrity test ---
1289+
1290+
#[test]
1291+
fn test_aead_rejects_tampered_ciphertext_through_pipeline() -> Result<()> {
1292+
use self_encryption::decrypt_chunk;
1293+
1294+
let original_data = random_bytes(10_000);
1295+
let (data_map, encrypted_chunks) = encrypt(original_data)?;
1296+
1297+
// Extract the src_hashes from the data map (the valid ones used during encryption)
1298+
let src_hashes: Vec<XorName> = data_map.infos().iter().map(|info| info.src_hash).collect();
1299+
1300+
// Take the first chunk's encrypted content and tamper with it
1301+
let first_chunk = encrypted_chunks
1302+
.first()
1303+
.ok_or_else(|| Error::Generic("no chunks".to_string()))?;
1304+
let mut tampered_content = first_chunk.content.to_vec();
1305+
let mid = tampered_content.len() / 2;
1306+
if let Some(byte) = tampered_content.get_mut(mid) {
1307+
*byte ^= 0xFF;
1308+
}
1309+
let tampered_bytes = Bytes::from(tampered_content);
1310+
1311+
// Call decrypt_chunk directly with valid src_hashes but tampered ciphertext
1312+
// This proves the AEAD layer (not hash lookup) is the integrity gate
1313+
let result = decrypt_chunk(0, &tampered_bytes, &src_hashes, 0);
1314+
assert!(
1315+
result.is_err(),
1316+
"decrypt_chunk should fail on tampered ciphertext due to AEAD tag verification"
1317+
);
1318+
1319+
Ok(())
1320+
}
1321+
13001322
// --- Task 17: Verify BLAKE3 is used for hashing ---
13011323

13021324
#[test]
@@ -1327,19 +1349,10 @@ fn test_encrypted_chunk_dst_hash_is_blake3() -> Result<()> {
13271349
Ok(())
13281350
}
13291351

1330-
// --- Task 18: Verify key derivation sizes ---
1352+
// --- Task 18: Verify key derivation sizes and roundtrip ---
13311353

13321354
#[test]
1333-
fn test_key_derivation_sizes() -> Result<()> {
1334-
// Key derivation internals are tested indirectly through encrypt/decrypt roundtrips,
1335-
// but we can verify the constant sizes are correct for ChaCha20-Poly1305.
1336-
use self_encryption::hash::content_hash;
1337-
1338-
let h1 = content_hash(b"hash1");
1339-
let h2 = content_hash(b"hash2");
1340-
let h3 = content_hash(b"hash3");
1341-
let hashes = vec![h1, h2, h3];
1342-
1355+
fn test_key_derivation_sizes_and_roundtrip() -> Result<()> {
13431356
// XorName is 32 bytes
13441357
assert_eq!(std::mem::size_of::<XorName>(), 32);
13451358

@@ -1356,10 +1369,5 @@ fn test_key_derivation_sizes() -> Result<()> {
13561369
assert_ne!(info.dst_hash, XorName::default());
13571370
}
13581371

1359-
// Verify hashes are 32 bytes (BLAKE3 output = XorName size)
1360-
for hash in &hashes {
1361-
assert_eq!(hash.0.len(), 32);
1362-
}
1363-
13641372
Ok(())
13651373
}

0 commit comments

Comments
 (0)