diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 79a6cad..dced2c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,7 @@ jobs: - fuzz_json_de - fuzz_cbor_decode - fuzz_cbor_roundtrip + - fuzz_object_ops steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 74da9c7..823dde0 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -41,3 +41,10 @@ path = "fuzz_targets/fuzz_cbor_roundtrip.rs" test = false doc = false bench = false + +[[bin]] +name = "fuzz_object_ops" +path = "fuzz_targets/fuzz_object_ops.rs" +test = false +doc = false +bench = false diff --git a/fuzz/fuzz_targets/fuzz_object_ops.rs b/fuzz/fuzz_targets/fuzz_object_ops.rs new file mode 100644 index 0000000..03bf12b --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_object_ops.rs @@ -0,0 +1,52 @@ +#![no_main] + +use arbitrary::Arbitrary; +use ijson::{IObject, IValue}; +use libfuzzer_sys::fuzz_target; +use std::collections::HashMap; + +// Small key space (u8 -> "k{n}") so op sequences repeatedly hit the same keys, +// reliably crossing the small-object table threshold in both directions and +// forcing hash collisions / Robin-Hood displacement on the u32 table. +#[derive(Arbitrary, Debug)] +enum Op { + Insert(u8, u64), + Remove(u8), + Get(u8), +} + +fuzz_target!(|ops: Vec| { + let mut obj = IObject::new(); + let mut oracle: HashMap = HashMap::new(); + + for op in ops { + match op { + Op::Insert(id, v) => { + let k = format!("k{id}"); + // insert() only errors on OOM / 2^30 overflow; neither is reachable here. + let prev = obj + .insert(k.as_str(), IValue::from(v)) + .unwrap() + .and_then(|old| old.to_u64()); + assert_eq!(prev, oracle.insert(id, v)); + } + Op::Remove(id) => { + let k = format!("k{id}"); + let removed = obj.remove(k.as_str()).and_then(|v| v.to_u64()); + assert_eq!(removed, oracle.remove(&id)); + } + Op::Get(id) => { + let k = format!("k{id}"); + let got = obj.get(k.as_str()).and_then(|v| v.to_u64()); + assert_eq!(got, oracle.get(&id).copied()); + } + } + assert_eq!(obj.len(), oracle.len()); + } + + // Final agreement: every oracle key present with the right value. + for (id, v) in &oracle { + let k = format!("k{id}"); + assert_eq!(obj.get(k.as_str()).and_then(|x| x.to_u64()), Some(*v)); + } +}); diff --git a/src/array.rs b/src/array.rs index 864f85e..44022ad 100644 --- a/src/array.rs +++ b/src/array.rs @@ -1529,16 +1529,7 @@ pub(super) mod private { impl Sealed for bool {} } -/// Trait for types that can be fallibly extended from an iterator -/// This is similar to the standard `Extend` trait, but allows for allocation failures -/// and is specialized for `IArray`. -pub trait TryExtend { - /// Attempts to extend `self` by appending items from the given iterator. - /// Returns an `AllocError` if allocation fails. - /// # Errors - /// Returns an `AllocError` if memory allocation fails during the extension. - fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), IJsonError>; -} +pub use crate::convert::{TryCollect, TryExtend, TryFromIterator}; impl + private::Sealed> TryExtend for IArray { fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), IJsonError> { @@ -1668,19 +1659,6 @@ fn convert_bf16>(value: T) -> bf16 { extend_impl_int!(i8, i16, i32, i64, isize, u8, u16, u32, u64, usize); extend_impl_float!(f16, bf16, f32, f64); -/// Trait for types that can be fallibly constructed from an iterator -/// This is similar to the standard `FromIterator` trait, but allows for allocation failures -/// and is specialized for `IArray`. -pub trait TryFromIterator { - /// Attempts to create an instance of `Self` from an iterator of items of type `T`. - /// Returns an `AllocError` if allocation fails. - /// # Errors - /// Returns `AllocError` if memory allocation fails during the construction. - fn try_from_iter>(iter: U) -> Result - where - Self: Sized; -} - impl + private::Sealed> TryFromIterator for IArray { fn try_from_iter>(iter: T) -> Result { let mut res = IArray::new(); @@ -1704,27 +1682,6 @@ macro_rules! from_iter_impl { from_iter_impl!(i8, i16, i32, i64, u8, u16, u32, u64, f16, bf16, f32, f64); -/// Extension trait for iterators to collect into an IArray with fallible allocation. -/// This is similar to the standard `collect` method, but allows for allocation failures. -pub trait TryCollect: Iterator + Sized { - /// Attempts to collect the iterator into a collection `B`. - /// Returns an `AllocError` if allocation fails. - /// # Errors - /// Returns `AllocError` if memory allocation fails during the collection. - fn try_collect(self) -> Result - where - B: TryFromIterator; -} - -impl> TryCollect for I { - fn try_collect(self) -> Result - where - B: TryFromIterator, - { - B::try_from_iter(self) - } -} - impl + private::Sealed> TryFrom> for IArray { type Error = IJsonError; fn try_from(other: Vec) -> Result { diff --git a/src/cbor.rs b/src/cbor.rs index b67fda0..30b701c 100644 --- a/src/cbor.rs +++ b/src/cbor.rs @@ -215,14 +215,16 @@ fn cbor_to_ivalue(val: Value, depth: u32) -> Result { Ok(out.into()) } Value::Map(entries) => { - let mut obj = IObject::with_capacity(entries.len()); + let mut obj = + IObject::with_capacity(entries.len()).map_err(|_| CborDecodeError::AllocError)?; for (k, v) in entries { let key = match k { Value::Text(s) => s, _ => return Err(CborDecodeError::InvalidValue), }; let val = cbor_to_ivalue(v, depth + 1)?; - obj.insert(&key, val); + obj.insert(&key, val) + .map_err(|_| CborDecodeError::AllocError)?; } Ok(obj.into()) } diff --git a/src/convert.rs b/src/convert.rs new file mode 100644 index 0000000..0c25d50 --- /dev/null +++ b/src/convert.rs @@ -0,0 +1,48 @@ +//! Fallible iterator / collection-conversion traits shared by [`IArray`] and +//! [`IObject`]. These mirror the standard `Extend`, `FromIterator`, and +//! `Iterator::collect`, but return a `Result`. +//! +//! [`IArray`]: crate::IArray +//! [`IObject`]: crate::IObject + +use crate::error::IJsonError; + +/// Trait for types that can be fallibly extended from an iterator. +/// This is similar to the standard `Extend` trait, but allows for allocation failures. +pub trait TryExtend { + /// Attempts to extend `self` by appending items from the given iterator. + /// # Errors + /// Returns an `AllocError` if memory allocation fails during the extension. + fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), IJsonError>; +} + +/// Trait for types that can be fallibly constructed from an iterator. +/// This is similar to the standard `FromIterator` trait, but allows for allocation failures. +pub trait TryFromIterator { + /// Attempts to create an instance of `Self` from an iterator of items of type `T`. + /// # Errors + /// Returns `AllocError` if memory allocation fails during the construction. + fn try_from_iter>(iter: U) -> Result + where + Self: Sized; +} + +/// Extension trait for iterators to collect into a fallible collection. +/// This is similar to the standard `collect` method, but allows for allocation failures. +pub trait TryCollect: Iterator + Sized { + /// Attempts to collect the iterator into a collection `B`. + /// # Errors + /// Returns `AllocError` if memory allocation fails during the collection. + fn try_collect(self) -> Result + where + B: TryFromIterator; +} + +impl> TryCollect for I { + fn try_collect(self) -> Result + where + B: TryFromIterator, + { + B::try_from_iter(self) + } +} diff --git a/src/de.rs b/src/de.rs index e1c9068..41d35ff 100644 --- a/src/de.rs +++ b/src/de.rs @@ -292,10 +292,12 @@ impl<'de> Visitor<'de> for ObjectVisitor { where V: MapAccess<'de>, { - let mut obj = IObject::with_capacity(visitor.size_hint().unwrap_or(0)); + let mut obj = IObject::with_capacity(visitor.size_hint().unwrap_or(0)) + .map_err(|_| SError::custom("Failed to allocate object"))?; while let Some(k) = visitor.next_key::()? { let v = visitor.next_value_seed(IValueDeserSeed::new(self.fpha_config))?; - obj.insert(k, v); + obj.insert(k, v) + .map_err(|e| SError::custom(e.to_string()))?; } Ok(obj) } diff --git a/src/lib.rs b/src/lib.rs index ecebc69..b7f2335 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,11 +36,13 @@ pub mod unsafe_string; #[cfg(not(feature = "thread_safe"))] pub use unsafe_string::IString; +pub mod convert; pub mod error; mod thin; mod value; pub use array::{FloatType, IArray}; +pub use convert::{TryCollect, TryExtend, TryFromIterator}; pub use number::INumber; pub use object::IObject; use std::alloc::Layout; diff --git a/src/macros.rs b/src/macros.rs index efeb375..fc00a36 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -166,7 +166,7 @@ macro_rules! ijson_internal { // Insert the current entry followed by trailing comma. (@object $object:ident [$($key:tt)+] ($value:expr) , $($rest:tt)*) => { - let _ = $object.insert(($($key)+), $value); + $object.insert(($($key)+), $value).unwrap(); ijson_internal!(@object $object () ($($rest)*) ($($rest)*)); }; @@ -177,7 +177,7 @@ macro_rules! ijson_internal { // Insert the last entry without trailing comma. (@object $object:ident [$($key:tt)+] ($value:expr)) => { - let _ = $object.insert(($($key)+), $value); + $object.insert(($($key)+), $value).unwrap(); }; // Next value is `null`. diff --git a/src/object.rs b/src/object.rs index fa09298..76ad51e 100644 --- a/src/object.rs +++ b/src/object.rs @@ -6,10 +6,11 @@ use std::collections::hash_map::DefaultHasher; use std::collections::{BTreeMap, HashMap}; use std::fmt::{self, Debug, Formatter}; use std::hash::{Hash, Hasher}; -use std::iter::FromIterator; use std::mem; use std::ops::{Index, IndexMut}; +use crate::convert::{TryExtend, TryFromIterator}; +use crate::error::{AllocError, IJsonError}; use crate::thin::{ThinMut, ThinMutExt, ThinRef, ThinRefExt}; use crate::{Defrag, DefragAllocator}; @@ -23,8 +24,43 @@ use super::value::{IValue, TypeTag}; #[repr(C)] #[repr(align(8))] struct Header { - len: usize, - cap: usize, + /// Packed field: + /// bits 0-31: length, + /// bits 32-63: capacity + packed: u64, +} + +impl Header { + const LEN_MASK: u64 = (1u64 << 32) - 1; + const LEN_SHIFT: u64 = 0; + const CAP_MASK: u64 = (1u64 << 32) - 1; + const CAP_SHIFT: u64 = 32; + + const fn new(len: usize, cap: usize) -> Result { + if len > Self::LEN_MASK as usize || cap > Self::CAP_MASK as usize { + return Err(IJsonError::Alloc(AllocError)); + } + let packed = ((len as u64) & Self::LEN_MASK) << Self::LEN_SHIFT + | ((cap as u64) & Self::CAP_MASK) << Self::CAP_SHIFT; + Ok(Self { packed }) + } + + fn len(&self) -> usize { + ((self.packed >> Self::LEN_SHIFT) & Self::LEN_MASK) as usize + } + + fn cap(&self) -> usize { + ((self.packed >> Self::CAP_SHIFT) & Self::CAP_MASK) as usize + } + + fn set_len(&mut self, len: usize) { + assert!( + len <= Self::LEN_MASK as usize, + "Length exceeds 32-bit limit" + ); + self.packed = (self.packed & !(Self::LEN_MASK << Self::LEN_SHIFT)) + | (((len as u64) & Self::LEN_MASK) << Self::LEN_SHIFT); + } } #[repr(C)] @@ -34,6 +70,14 @@ struct KeyValuePair { value: IValue, } +/// Objects with capacity at or below this threshold store no hash table. +const SMALL_OBJECT_THRESHOLD: usize = 8; + +/// Whether an object of this capacity carries a hash table. +fn has_table(cap: usize) -> bool { + cap > SMALL_OBJECT_THRESHOLD +} + fn hash_capacity(cap: usize) -> usize { cap + cap / 4 } @@ -51,11 +95,22 @@ fn hash_bucket(s: &IString, hash_cap: usize) -> usize { struct SplitHeader<'a> { cap: usize, items: &'a [KeyValuePair], - table: &'a [usize], + table: &'a [u32], } impl<'a> SplitHeader<'a> { + // Returns the *item index* (Ok) in no-table mode, or the *table bucket* (Ok) in table mode. fn find_bucket(&self, key: &IString) -> Result { + if !has_table(self.cap) { + // Small object: linear scan the items array. + for (i, kvp) in self.items.iter().enumerate() { + if &kvp.key == key { + return Ok(i); + } + } + // No table bucket to report; insertion just appends. + return Err(usize::MAX); + } let hash_cap = hash_capacity(self.cap); let initial_bucket = hash_bucket(key, hash_cap); unsafe { @@ -65,12 +120,12 @@ impl<'a> SplitHeader<'a> { let index = *self.table.get_unchecked(bucket); // If we hit an empty bucket, we know the key is not present - if index == usize::MAX { + if index == u32::MAX { return Err(bucket); } // If the bucket contains our key, we found the bucket - let k = &self.items.get_unchecked(index).key; + let k = &self.items.get_unchecked(index as usize).key; if k == key { return Ok(bucket); } @@ -87,13 +142,17 @@ impl<'a> SplitHeader<'a> { } // Safety: index must be in bounds unsafe fn find_bucket_from_index(&self, index: usize) -> usize { + // No-table mode: the "bucket" identifying an item is the item index itself. + if !has_table(self.cap) { + return index; + } let hash_cap = hash_capacity(self.cap); let key = &self.items.get_unchecked(index).key; let mut bucket = hash_bucket(key, hash_cap); // We don't bother with any early exit conditions, because // we know the item is present. - while *self.table.get_unchecked(bucket) != index { + while *self.table.get_unchecked(bucket) != index as u32 { bucket = (bucket + 1) % hash_cap; } @@ -104,7 +163,7 @@ impl<'a> SplitHeader<'a> { struct SplitHeaderMut<'a> { cap: usize, items: &'a mut [KeyValuePair], - table: &'a mut [usize], + table: &'a mut [u32], } impl<'a> SplitHeaderMut<'a> { @@ -126,12 +185,12 @@ impl<'a> SplitHeaderMut<'a> { let index = *self.table.get_unchecked(bucket); // If we hit an empty bucket, we're done - if index == usize::MAX { + if index == u32::MAX { return; } // If the probe length is zero, we're done - let k = &self.items.get_unchecked(index).key; + let k = &self.items.get_unchecked(index as usize).key; if hash_bucket(k, hash_cap) == bucket { return; } @@ -146,11 +205,16 @@ impl<'a> SplitHeaderMut<'a> { // // Inserts an index into the table, shifting existing elements down until // there's an empty slot. - unsafe fn shift(&mut self, initial_bucket: usize, mut index: usize) { + unsafe fn shift(&mut self, initial_bucket: usize, index: usize) { + // No-table mode keeps no hash table to insert into. + if !has_table(self.cap) { + return; + } let hash_cap = hash_capacity(self.cap); + let mut index = index as u32; for i in 0..hash_cap { // If we hit an empty bucket, we're done - if index == usize::MAX { + if index == u32::MAX { return; } @@ -158,10 +222,21 @@ impl<'a> SplitHeaderMut<'a> { mem::swap(self.table.get_unchecked_mut(bucket), &mut index); } } - // Safety: Bucket index must be in range and occupied + // Safety: Bucket index must be in range and occupied. In no-table mode, `bucket` + // is the item index. unsafe fn remove_bucket(&mut self, bucket: usize) { + if !has_table(self.cap) { + // `bucket` is the item index. Swap it to the back so the caller's `pop` + // removes it; the displaced last item lands at `bucket`. + let last_index = self.items.len() - 1; + if bucket != last_index { + self.items.swap(bucket, last_index); + } + return; + } + // Remove the entry from the table - let index = mem::replace(self.table.get_unchecked_mut(bucket), usize::MAX); + let index = mem::replace(self.table.get_unchecked_mut(bucket), u32::MAX) as usize; // Unshift any displaced buckets, so the table is valid again self.unshift(bucket); @@ -175,7 +250,7 @@ impl<'a> SplitHeaderMut<'a> { // Update it to point to the location where that item will be // after we swap it. - *self.table.get_unchecked_mut(bucket_to_update) = index; + *self.table.get_unchecked_mut(bucket_to_update) = index as u32; // Swap the element to be removed to the back self.items.swap(index, last_index); @@ -188,17 +263,23 @@ trait HeaderRef<'a>: ThinRefExt<'a, Header> { // Safety: pointers to the end of structs are allowed unsafe { self.ptr().add(1).cast() } } - fn hashes_ptr(&self) -> *const usize { + fn hashes_ptr(&self) -> *const u32 { // Safety: pointers to the end of structs are allowed - unsafe { self.items_ptr().add(self.cap).cast() } + unsafe { self.items_ptr().add(self.cap()).cast() } } fn split(&self) -> SplitHeader<'a> { + let cap = self.cap(); // Safety: Header `len` and `cap` must be accurate unsafe { SplitHeader { - cap: self.cap, - items: std::slice::from_raw_parts(self.items_ptr(), self.len), - table: std::slice::from_raw_parts(self.hashes_ptr(), hash_capacity(self.cap)), + cap, + items: std::slice::from_raw_parts(self.items_ptr(), self.len()), + // Small objects carry no table: present an empty slice. + table: if has_table(cap) { + std::slice::from_raw_parts(self.hashes_ptr(), hash_capacity(cap)) + } else { + &[] + }, } } } @@ -209,21 +290,27 @@ trait HeaderMut<'a>: ThinMutExt<'a, Header> { // Safety: pointers to the end of structs are allowed unsafe { self.ptr_mut().add(1).cast() } } - fn hashes_ptr_mut(&mut self) -> *mut usize { + fn hashes_ptr_mut(&mut self) -> *mut u32 { // Safety: pointers to the end of structs are allowed - unsafe { self.items_ptr_mut().add(self.cap).cast() } + unsafe { self.items_ptr_mut().add(self.cap()).cast() } } fn split_mut(mut self) -> SplitHeaderMut<'a> { // Safety: Header `len` and `cap` must be accurate - let len = self.len; - let hash_cap = hash_capacity(self.cap); + let cap = self.cap(); + let len = self.len(); let item_ptr = self.items_ptr_mut(); - let hash_ptr = self.hashes_ptr_mut(); unsafe { + // Small objects carry no table: present an empty slice. + let table: &mut [u32] = if has_table(cap) { + let hash_ptr = self.hashes_ptr_mut(); + std::slice::from_raw_parts_mut(hash_ptr, hash_capacity(cap)) + } else { + &mut [] + }; SplitHeaderMut { - cap: self.cap, + cap, items: std::slice::from_raw_parts_mut(item_ptr as *mut _, len), - table: std::slice::from_raw_parts_mut(hash_ptr as *mut _, hash_cap), + table, } } } @@ -235,25 +322,26 @@ trait HeaderMut<'a>: ThinMutExt<'a, Header> { // Safety: Object must not be empty unsafe fn pop(&mut self) -> (IString, IValue) { - self.len -= 1; - let item = self.items_ptr_mut().add(self.len).read(); + let new_len = self.len() - 1; + self.set_len(new_len); + let item = self.items_ptr_mut().add(new_len).read(); (item.key, item.value) } unsafe fn push(&mut self, key: IString, value: IValue) -> usize { + let res = self.len(); self.items_ptr_mut() - .add(self.len) + .add(res) .write(KeyValuePair { key, value }); - let res = self.len; - self.len += 1; + self.set_len(res + 1); res } fn clear(&mut self) { // Clear the table for item in self.reborrow().split_mut().table { - *item = usize::MAX; + *item = u32::MAX; } // Drop the items - while self.len > 0 { + while self.len() > 0 { // Safety: not empty unsafe { self.pop(); @@ -314,7 +402,11 @@ impl<'a> OccupiedEntry<'a> { // Safety: Indices are known to be in range let split = self.header.split(); unsafe { - let index = *split.table.get_unchecked(self.bucket); + let index = if has_table(split.cap) { + *split.table.get_unchecked(self.bucket) as usize + } else { + self.bucket + }; let kvp = split.items.get_unchecked(index); (&kvp.key, &kvp.value) } @@ -323,7 +415,11 @@ impl<'a> OccupiedEntry<'a> { // Safety: Indices are known to be in range let split = self.header.reborrow().split_mut(); unsafe { - let index = *split.table.get_unchecked(self.bucket); + let index = if has_table(split.cap) { + *split.table.get_unchecked(self.bucket) as usize + } else { + self.bucket + }; let kvp = split.items.get_unchecked_mut(index); (&kvp.key, &mut kvp.value) } @@ -332,7 +428,11 @@ impl<'a> OccupiedEntry<'a> { // Safety: Indices are known to be in range let split = self.header.split_mut(); unsafe { - let index = *split.table.get_unchecked(self.bucket); + let index = if has_table(split.cap) { + *split.table.get_unchecked(self.bucket) as usize + } else { + self.bucket + }; let kvp = split.items.get_unchecked_mut(index); (&kvp.key, &mut kvp.value) } @@ -521,34 +621,42 @@ pub struct IObject(pub(crate) IValue); value_subtype_impls!(IObject, into_object, as_object, as_object_mut); -static EMPTY_HEADER: Header = Header { len: 0, cap: 0 }; +static EMPTY_HEADER: Header = Header { packed: 0 }; impl IObject { fn layout(cap: usize) -> Result { - Ok(Layout::new::
() + let layout = Layout::new::
() .extend(Layout::array::(cap)?)? - .0 - .extend(Layout::array::(hash_capacity(cap))?)? - .0 - .pad_to_align()) + .0; + // Small objects carry no hash table. + let layout = if has_table(cap) { + layout.extend(Layout::array::(hash_capacity(cap))?)?.0 + } else { + layout + }; + Ok(layout.pad_to_align()) } - fn alloc(cap: usize) -> *mut Header { + fn alloc(cap: usize) -> Result<*mut Header, IJsonError> { unsafe { - let hd = alloc(Self::layout(cap).unwrap()).cast::
(); - std::ptr::write(hd, Header { len: 0, cap }); - let mut hd_mut = ThinMut::new(hd); - let hash_ptr = hd_mut.hashes_ptr_mut(); - for i in 0..hash_capacity(cap) { - hash_ptr.add(i).write(usize::MAX); + let hd = alloc(Self::layout(cap).map_err(|_| AllocError)?).cast::
(); + std::ptr::write(hd, Header::new(0, cap)?); + if has_table(cap) { + let mut hd_mut = ThinMut::new(hd); + let hash_ptr = hd_mut.hashes_ptr_mut(); + for i in 0..hash_capacity(cap) { + hash_ptr.add(i).write(u32::MAX); + } } - hd + Ok(hd) } } fn dealloc(ptr: *mut Header) { unsafe { - let layout = Self::layout((*ptr).cap).unwrap(); + // SAFETY: `cap` is read from a live header that was successfully allocated, so its + // layout was valid then and is valid now; the unwrap cannot fail. + let layout = Self::layout((*ptr).cap()).unwrap(); dealloc(ptr.cast(), layout); } } @@ -561,12 +669,18 @@ impl IObject { /// Constructs a new `IObject` with the specified capacity. At least that many entries /// can be added to the object without reallocating. + /// + /// # Errors + /// Returns an `AllocError` if memory allocation fails or the capacity exceeds the + /// 32-bit limit. #[must_use] - pub fn with_capacity(cap: usize) -> Self { + pub fn with_capacity(cap: usize) -> Result { if cap == 0 { - Self::new() + Ok(Self::new()) } else { - Self(unsafe { IValue::new_ptr(Self::alloc(cap).cast(), TypeTag::ObjectOrTrue) }) + Ok(Self(unsafe { + IValue::new_ptr(Self::alloc(cap)?.cast(), TypeTag::ObjectOrTrue) + })) } } @@ -586,12 +700,12 @@ impl IObject { /// can hold without reallocating. #[must_use] pub fn capacity(&self) -> usize { - self.header().cap + self.header().cap() } /// Returns the number of entries currently stored in the object. #[must_use] pub fn len(&self) -> usize { - self.header().len + self.header().len() } /// Returns `true` if the object is empty. #[must_use] @@ -599,8 +713,8 @@ impl IObject { self.len() == 0 } - fn resize_internal(&mut self, cap: usize) { - let old_obj = mem::replace(self, Self::with_capacity(cap)); + fn resize_internal(&mut self, cap: usize) -> Result<(), IJsonError> { + let old_obj = mem::replace(self, Self::with_capacity(cap)?); if !self.is_static() { unsafe { let mut hd = self.header_mut(); @@ -612,31 +726,41 @@ impl IObject { } } } + Ok(()) } /// Reserves space for at least this many additional entries. - pub fn reserve(&mut self, additional: usize) { + /// + /// # Errors + /// Returns an `AllocError` if memory allocation fails. + pub fn reserve(&mut self, additional: usize) -> Result<(), IJsonError> { let hd = self.header(); - let current_capacity = hd.cap; - let desired_capacity = hd.len.checked_add(additional).unwrap(); + let current_capacity = hd.cap(); + let desired_capacity = hd.len().checked_add(additional).ok_or(AllocError)?; if current_capacity >= desired_capacity { - return; + return Ok(()); } - self.resize_internal(cmp::max(current_capacity * 2, desired_capacity.max(4))); + self.resize_internal(cmp::max(current_capacity * 2, desired_capacity.max(4))) } /// Returns a view of an entry within this object. - pub fn entry(&mut self, key: impl Into) -> Entry<'_> { - self.reserve(1); + /// + /// # Errors + /// Returns an `AllocError` if reserving space for the entry fails. + pub fn entry(&mut self, key: impl Into) -> Result, IJsonError> { + self.reserve(1)?; // Safety: cannot be static after reserving space - unsafe { self.header_mut().entry(key.into()) } + Ok(unsafe { self.header_mut().entry(key.into()) }) } /// Returns a view of an entry within this object, whilst avoiding /// cloning the key if the entry is already occupied. - pub fn entry_or_clone(&mut self, key: &IString) -> Entry<'_> { - self.reserve(1); + /// + /// # Errors + /// Returns an `AllocError` if reserving space for the entry fails. + pub fn entry_or_clone(&mut self, key: &IString) -> Result, IJsonError> { + self.reserve(1)?; // Safety: cannot be static after reserving space - unsafe { self.header_mut().entry_or_clone(key) } + Ok(unsafe { self.header_mut().entry_or_clone(key) }) } /// Returns an iterator over references to the keys in this object. pub fn keys(&self) -> impl Iterator { @@ -709,12 +833,19 @@ impl IObject { /// Inserts a new value into this object with the specified key. If a value already /// existed at this key, that value is replaced and returend. - pub fn insert(&mut self, k: impl Into, v: impl Into) -> Option { - match self.entry(k) { - Entry::Occupied(mut occ) => Some(occ.insert(v)), + /// + /// # Errors + /// Returns an `AllocError` if memory allocation fails. + pub fn insert( + &mut self, + k: impl Into, + v: impl Into, + ) -> Result, IJsonError> { + match self.entry(k)? { + Entry::Occupied(mut occ) => Ok(Some(occ.insert(v))), Entry::Vacant(vac) => { vac.insert(v); - None + Ok(None) } } } @@ -733,7 +864,8 @@ impl IObject { /// Shrinks the memory allocation used by the object such that its /// capacity becomes equal to its length. pub fn shrink_to_fit(&mut self) { - self.resize_internal(self.len()); + // Shrinking never grows capacity, so this cannot fail. + self.resize_internal(self.len()).unwrap(); } /// Calls the specified function for each entry in the object. Each entry @@ -745,7 +877,7 @@ impl IObject { // Safety: not static let mut hd = unsafe { self.header_mut() }; let mut index = 0; - while index < hd.len { + while index < hd.len() { let mut split = hd.reborrow().split_mut(); // Safety: Indices are in range @@ -764,9 +896,11 @@ impl IObject { } pub(crate) fn clone_impl(&self) -> IValue { - let mut res = Self::with_capacity(self.len()); + // Cloning an existing (valid) object never exceeds its capacity, so this + // cannot fail short of the global allocator aborting. + let mut res = Self::with_capacity(self.len()).unwrap(); for (k, v) in self.iter() { - res.insert(k.clone(), v.clone()); + res.insert(k.clone(), v.clone()).unwrap(); } res.0 @@ -785,6 +919,8 @@ impl IObject { if self.is_static() { 0 } else { + // Layout of a live object's own capacity; it allocated successfully, so + // recomputing its layout cannot fail. Self::layout(self.capacity()).unwrap().size() + self .iter() @@ -851,21 +987,22 @@ impl Hash for IObject { } } -impl, V: Into> Extend<(K, V)> for IObject { - fn extend>(&mut self, iter: T) { +impl, V: Into> TryExtend<(K, V)> for IObject { + fn try_extend(&mut self, iter: impl IntoIterator) -> Result<(), IJsonError> { let iter = iter.into_iter(); - self.reserve(iter.size_hint().0); + self.reserve(iter.size_hint().0)?; for (k, v) in iter { - self.insert(k, v); + self.insert(k, v)?; } + Ok(()) } } -impl, V: Into> FromIterator<(K, V)> for IObject { - fn from_iter>(iter: T) -> Self { +impl, V: Into> TryFromIterator<(K, V)> for IObject { + fn try_from_iter>(iter: T) -> Result { let mut res = IObject::new(); - res.extend(iter); - res + res.try_extend(iter)?; + Ok(res) } } @@ -920,7 +1057,9 @@ impl ObjectIndex for &str { } fn index_or_insert(self, v: &mut IObject) -> &mut IValue { - v.entry(IString::intern(self)).or_insert(IValue::NULL) + v.entry(IString::intern(self)) + .unwrap() + .or_insert(IValue::NULL) } fn remove(self, v: &mut IObject) -> Option<(IString, IValue)> { @@ -937,7 +1076,11 @@ impl ObjectIndex for &IString { if let Ok(bucket) = hd.find_bucket(self) { // Safety: Bucket index is valid unsafe { - let index = *hd.table.get_unchecked(bucket); + let index = if has_table(hd.cap) { + *hd.table.get_unchecked(bucket) as usize + } else { + bucket + }; let item = hd.items.get_unchecked(index); Some((&item.key, &item.value)) } @@ -955,7 +1098,11 @@ impl ObjectIndex for &IString { if let Ok(bucket) = hd.as_ref().find_bucket(self) { // Safety: Bucket index is valid unsafe { - let index = *hd.table.get_unchecked(bucket); + let index = if has_table(hd.cap) { + *hd.table.get_unchecked(bucket) as usize + } else { + bucket + }; let item = hd.items.get_unchecked_mut(index); Some((&item.key, &mut item.value)) } @@ -966,7 +1113,7 @@ impl ObjectIndex for &IString { } fn index_or_insert(self, v: &mut IObject) -> &mut IValue { - v.entry_or_clone(self).or_insert(IValue::NULL) + v.entry_or_clone(self).unwrap().or_insert(IValue::NULL) } fn remove(self, v: &mut IObject) -> Option<(IString, IValue)> { @@ -1069,19 +1216,21 @@ impl<'a> IntoIterator for &'a mut IObject { } } -impl, V: Into> From> for IObject { - fn from(other: HashMap) -> Self { - let mut res = Self::with_capacity(other.len()); - res.extend(other.into_iter().map(|(k, v)| (k.into(), v.into()))); - res +impl, V: Into> TryFrom> for IObject { + type Error = IJsonError; + fn try_from(other: HashMap) -> Result { + let mut res = Self::with_capacity(other.len())?; + res.try_extend(other)?; + Ok(res) } } -impl, V: Into> From> for IObject { - fn from(other: BTreeMap) -> Self { - let mut res = Self::with_capacity(other.len()); - res.extend(other.into_iter().map(|(k, v)| (k.into(), v.into()))); - res +impl, V: Into> TryFrom> for IObject { + type Error = IJsonError; + fn try_from(other: BTreeMap) -> Result { + let mut res = Self::with_capacity(other.len())?; + res.try_extend(other)?; + Ok(res) } } @@ -1111,7 +1260,9 @@ impl Defrag for IObject { } unsafe { let ptr = self.0.ptr().cast::
(); - let new_ptr = defrag_allocator.realloc_ptr(ptr, Self::layout((*ptr).cap).unwrap()); + // SAFETY: `cap` is read from a live header that already allocated with this layout, + // so recomputing it cannot fail. + let new_ptr = defrag_allocator.realloc_ptr(ptr, Self::layout((*ptr).cap()).unwrap()); self.0.set_ptr(new_ptr.cast()); } self @@ -1121,15 +1272,33 @@ impl Defrag for IObject { #[cfg(test)] mod tests { use super::*; + use crate::convert::TryCollect; #[mockalloc::test] fn can_create() { let x = IObject::new(); - let y = IObject::with_capacity(10); + let y = IObject::with_capacity(10).unwrap(); assert_eq!(x, y); } + #[test] + fn header_is_packed_to_8_bytes() { + assert_eq!(mem::size_of::
(), 8); + } + + #[test] + fn small_object_has_no_table() { + assert!(!has_table(4)); + assert_eq!(IObject::layout(4).unwrap().size(), 72); + } + + #[test] + fn large_object_has_table() { + assert!(has_table(16)); + assert_eq!(IObject::layout(16).unwrap().size(), 344); + } + #[mockalloc::test] fn can_collect() { let x = vec![ @@ -1137,7 +1306,7 @@ mod tests { ("b", IValue::TRUE), ("c", IValue::FALSE), ]; - let y: IObject = x.into_iter().collect(); + let y: IObject = x.into_iter().try_collect().unwrap(); assert_eq!(y, y.clone()); assert_eq!(y.len(), 3); @@ -1149,9 +1318,9 @@ mod tests { #[mockalloc::test] fn can_insert() { let mut x = IObject::new(); - x.insert("a", IValue::NULL); - x.insert("b", IValue::TRUE); - x.insert("c", IValue::FALSE); + x.insert("a", IValue::NULL).unwrap(); + x.insert("b", IValue::TRUE).unwrap(); + x.insert("c", IValue::FALSE).unwrap(); assert_eq!(x.len(), 3); assert_eq!(x["a"], IValue::NULL); @@ -1162,10 +1331,10 @@ mod tests { #[mockalloc::test] fn can_nest() { let mut x = IObject::new(); - x.insert("a", IValue::NULL); - x.insert("b", x.clone()); - x.insert("c", IValue::FALSE); - x.insert("d", x.clone()); + x.insert("a", IValue::NULL).unwrap(); + x.insert("b", x.clone()).unwrap(); + x.insert("c", IValue::FALSE).unwrap(); + x.insert("d", x.clone()).unwrap(); assert_eq!(x.len(), 4); assert_eq!(x["a"], IValue::NULL); @@ -1181,7 +1350,7 @@ mod tests { ("b", IValue::TRUE), ("c", IValue::FALSE), ]; - let mut y: IObject = x.into_iter().collect(); + let mut y: IObject = x.into_iter().try_collect().unwrap(); assert_eq!(y.len(), 3); assert_eq!(y.capacity(), 4); @@ -1220,7 +1389,7 @@ mod tests { if x.contains_key(&k) { x.remove(&k); } else { - x.insert(k, op); + x.insert(k, op).unwrap(); } } assert_eq!(x, IObject::new()); diff --git a/src/ser.rs b/src/ser.rs index e951d4b..19ec8ac 100644 --- a/src/ser.rs +++ b/src/ser.rs @@ -293,7 +293,8 @@ impl Serializer for ValueSerializer { T: ?Sized + Serialize, { let mut obj = IObject::new(); - obj.insert(variant, value.serialize(self)?); + obj.insert(variant, value.serialize(self)?) + .map_err(|e| Error::custom(e.to_string()))?; Ok(obj.into()) } @@ -345,7 +346,8 @@ impl Serializer for ValueSerializer { fn serialize_map(self, len: Option) -> Result { Ok(SerializeObject { - object: IObject::with_capacity(len.unwrap_or(0)), + object: IObject::with_capacity(len.unwrap_or(0)) + .map_err(|e| Error::custom(e.to_string()))?, next_key: None, }) } @@ -367,7 +369,7 @@ impl Serializer for ValueSerializer { ) -> Result { Ok(SerializeObjectVariant { name: variant.into(), - object: IObject::with_capacity(len), + object: IObject::with_capacity(len).map_err(|e| Error::custom(e.to_string()))?, }) } } @@ -458,7 +460,9 @@ impl SerializeTupleVariant for SerializeArrayVariant { fn end(self) -> Result { let mut object = IObject::new(); - object.insert(self.name, self.array); + object + .insert(self.name, self.array) + .map_err(|e| Error::custom(e.to_string()))?; Ok(object.into()) } @@ -486,7 +490,9 @@ impl SerializeMap for SerializeObject { .next_key .take() .expect("serialize_value called before serialize_key"); - self.object.insert(key, value.serialize(ValueSerializer)?); + self.object + .insert(key, value.serialize(ValueSerializer)?) + .map_err(|e| Error::custom(e.to_string()))?; Ok(()) } @@ -699,13 +705,17 @@ impl SerializeStructVariant for SerializeObjectVariant { where T: ?Sized + Serialize, { - self.object.insert(key, value.serialize(ValueSerializer)?); + self.object + .insert(key, value.serialize(ValueSerializer)?) + .map_err(|e| Error::custom(e.to_string()))?; Ok(()) } fn end(self) -> Result { let mut object = IObject::new(); - object.insert(self.name, self.object); + object + .insert(self.name, self.object) + .map_err(|e| Error::custom(e.to_string()))?; Ok(object.into()) } } diff --git a/src/value.rs b/src/value.rs index 3ce77a5..7c4453c 100644 --- a/src/value.rs +++ b/src/value.rs @@ -1030,12 +1030,12 @@ impl From for IValue { typed_conversions! { INumber: i8, u8, i16, u16, i32, u32, i64, u64, isize, usize; IString: String, &String, &mut String, &str, &mut str; - IObject: - HashMap where (K: Into, V: Into), - BTreeMap where (K: Into, V: Into); } typed_conversions_fallible! { INumber: f16, bf16, f32, f64; + IObject: + HashMap where (K: Into, V: Into), + BTreeMap where (K: Into, V: Into); IArray: Vec where (T: Into + crate::array::private::Sealed), Vec, Vec, Vec, Vec, Vec, Vec, @@ -1261,7 +1261,7 @@ mod tests { #[mockalloc::test] fn test_object() { for v in 4..20 { - let mut o: IObject = (0..v).map(|i| (i.to_string(), i)).collect(); + let mut o: IObject = (0..v).map(|i| (i.to_string(), i)).try_collect().unwrap(); let mut x = IValue::from(o.clone()); assert!(x.is_object()); assert_eq!(x.type_(), ValueType::Object); @@ -1270,21 +1270,30 @@ mod tests { assert!(matches!(x.clone().destructure(), Destructured::Object(u) if u == o)); assert!(matches!(x.clone().destructure_ref(), DestructuredRef::Object(u) if *u == o)); assert!(matches!(x.clone().destructure_mut(), DestructuredMut::Object(u) if *u == o)); + // Layout: packed 8-byte header + KeyValuePair array + (only when cap > 8) + // a u32 hash table, padded to 8-byte alignment. Small objects carry no table. + let cap = o.capacity(); + let table_bytes = if cap > 8 { + 5 * cap / 4 * mem::size_of::() + } else { + 0 + }; + let raw = mem::size_of::() + + cap * (mem::size_of::() + mem::size_of::()) + + table_bytes; assert_eq!( x.mem_allocated(), o.iter() .map(|(k, v)| k.mem_allocated() + v.mem_allocated()) .sum::() - + o.capacity() * (mem::size_of::() + mem::size_of::()) - + 5 * o.capacity() / 4 * mem::size_of::() - + 2 * mem::size_of::() + + ((raw + 7) & !7) ); } } #[mockalloc::test] fn test_into_object_for_object() { - let o: IObject = (0..10).map(|i| (i.to_string(), i)).collect(); + let o: IObject = (0..10).map(|i| (i.to_string(), i)).try_collect().unwrap(); let x = IValue::from(o.clone()); assert_eq!(x.into_object(), Ok(o));