json/
object.rs

1use std::{ ptr, mem, str, slice, fmt };
2use std::ops::{ Index, IndexMut, Deref };
3use std::iter::FromIterator;
4
5use crate::codegen::{ DumpGenerator, Generator, PrettyGenerator };
6use crate::value::JsonValue;
7
8const KEY_BUF_LEN: usize = 32;
9static NULL: JsonValue = JsonValue::Null;
10
11// FNV-1a implementation
12//
13// While the `Object` is implemented as a binary tree, not a hash table, the
14// order in which the tree is balanced makes absolutely no difference as long
15// as there is a deterministic left / right ordering with good spread.
16// Comparing a hashed `u64` is faster than comparing `&str` or even `&[u8]`,
17// for larger objects this yields non-trivial performance benefits.
18//
19// Additionally this "randomizes" the keys a bit. Should the keys in an object
20// be inserted in alphabetical order (an example of such a use case would be
21// using an object as a store for entries by ids, where ids are sorted), this
22// will prevent the tree from being constructed in a way where the same branch
23// of each node is always used, effectively producing linear lookup times. Bad!
24//
25// Example:
26//
27// ```
28// println!("{}", hash_key(b"10000056"));
29// println!("{}", hash_key(b"10000057"));
30// println!("{}", hash_key(b"10000058"));
31// println!("{}", hash_key(b"10000059"));
32// ```
33//
34// Produces:
35//
36// ```
37// 15043794053238616431  <-- 2nd
38// 15043792953726988220  <-- 1st
39// 15043800650308385697  <-- 4th
40// 15043799550796757486  <-- 3rd
41// ```
42#[inline]
43fn hash_key(key: &[u8]) -> u64 {
44    let mut hash: u64 = 0xcbf29ce484222325;
45    for byte in key {
46        hash ^= *byte as u64;
47        hash = hash.wrapping_mul(0x100000001b3);
48    }
49    hash
50}
51
52struct Key {
53    // Internal buffer to store keys that fit within `KEY_BUF_LEN`,
54    // otherwise this field will contain garbage.
55    pub buf: [u8; KEY_BUF_LEN],
56
57    // Length of the key in bytes.
58    pub len: usize,
59
60    // Cached raw pointer to the key, so that we can cheaply construct
61    // a `&str` slice from the `Node` without checking if the key is
62    // allocated separately on the heap, or in the `key_buf`.
63    pub ptr: *mut u8,
64
65    // A hash of the key, explanation below.
66    pub hash: u64,
67}
68
69impl Key {
70    #[inline]
71    fn new(hash: u64, len: usize) -> Self {
72        Key {
73            buf: [0; KEY_BUF_LEN],
74            len: len,
75            ptr: ptr::null_mut(),
76            hash: hash
77        }
78    }
79
80    #[inline]
81    fn as_bytes(&self) -> &[u8] {
82        unsafe {
83            slice::from_raw_parts(self.ptr, self.len)
84        }
85    }
86
87    #[inline]
88    fn as_str(&self) -> &str {
89        unsafe {
90            str::from_utf8_unchecked(self.as_bytes())
91        }
92    }
93
94    // The `buf` on the `Key` can only be filled after the struct
95    // is already on the `Vec`'s heap (along with the `Node`).
96    // For that reason it's not set in `Key::new` but only after
97    // the `Node` is created and allocated.
98    #[inline]
99    fn attach(&mut self, key: &[u8]) {
100        if self.len <= KEY_BUF_LEN {
101            unsafe {
102                ptr::copy_nonoverlapping(
103                    key.as_ptr(),
104                    self.buf.as_mut_ptr(),
105                    self.len
106                );
107            }
108            self.ptr = self.buf.as_mut_ptr();
109        } else {
110            let mut heap = key.to_vec();
111            self.ptr = heap.as_mut_ptr();
112            mem::forget(heap);
113        }
114    }
115
116    // Since we store `Node`s on a vector, it will suffer from reallocation.
117    // Whenever that happens, `key.ptr` for short keys will turn into dangling
118    // pointers and will need to be re-cached.
119    #[inline]
120    fn fix_ptr(&mut self) {
121        if self.len <= KEY_BUF_LEN {
122            self.ptr = self.buf.as_mut_ptr();
123        }
124    }
125}
126
127// Implement `Sync` and `Send` for `Key` despite the use of raw pointers. The struct
128// itself should be memory safe.
129unsafe impl Sync for Key {}
130unsafe impl Send for Key {}
131
132// Because long keys _can_ be stored separately from the `Key` on heap,
133// it's essential to clean up the heap allocation when the `Key` is dropped.
134impl Drop for Key {
135    fn drop(&mut self) {
136        unsafe {
137            if self.len > KEY_BUF_LEN {
138                // Construct a `Vec` out of the `key_ptr`. Since the key is
139                // always allocated from a slice, the capacity is equal to length.
140                let heap = Vec::from_raw_parts(
141                    self.ptr,
142                    self.len,
143                    self.len
144                );
145
146                // Now that we have an owned `Vec<u8>`, drop it.
147                drop(heap);
148            }
149        }
150    }
151}
152
153// Just like with `Drop`, `Clone` needs a custom implementation that accounts
154// for the fact that key _can_ be separately heap allocated.
155impl Clone for Key {
156    fn clone(&self) -> Self {
157        if self.len > KEY_BUF_LEN {
158            let mut heap = self.as_bytes().to_vec();
159            let ptr = heap.as_mut_ptr();
160            mem::forget(heap);
161
162            Key {
163                buf: [0; KEY_BUF_LEN],
164                len: self.len,
165                ptr: ptr,
166                hash: self.hash,
167            }
168        } else {
169            Key {
170                buf: self.buf,
171                len: self.len,
172                ptr: ptr::null_mut(), // requires a `fix_ptr` call after `Node` is on the heap
173                hash: self.hash,
174            }
175        }
176    }
177}
178
179#[derive(Clone)]
180struct Node {
181    // String-esque key abstraction
182    pub key: Key,
183
184    // Value stored.
185    pub value: JsonValue,
186
187    // Store vector index pointing to the `Node` for which `key_hash` is smaller
188    // than that of this `Node`.
189    // Will default to 0 as root node can't be referenced anywhere else.
190    pub left: usize,
191
192    // Same as above but for `Node`s with hash larger than this one. If the
193    // hash is the same, but keys are different, the lookup will default
194    // to the right branch as well.
195    pub right: usize,
196}
197
198impl fmt::Debug for Node {
199    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
200        fmt::Debug::fmt(&(self.key.as_str(), &self.value, self.left, self.right), f)
201    }
202}
203
204impl PartialEq for Node {
205    fn eq(&self, other: &Node) -> bool {
206        self.key.hash       == other.key.hash       &&
207        self.key.as_bytes() == other.key.as_bytes() &&
208        self.value          == other.value
209    }
210}
211
212impl Node {
213    #[inline]
214    fn new(value: JsonValue, hash: u64, len: usize) -> Node {
215        Node {
216            key: Key::new(hash, len),
217            value: value,
218            left: 0,
219            right: 0,
220        }
221    }
222}
223
224/// A binary tree implementation of a string -> `JsonValue` map. You normally don't
225/// have to interact with instances of `Object`, much more likely you will be
226/// using the `JsonValue::Object` variant, which wraps around this struct.
227#[derive(Debug)]
228pub struct Object {
229    store: Vec<Node>
230}
231
232impl Object {
233    /// Create a new, empty instance of `Object`. Empty `Object` performs no
234    /// allocation until a value is inserted into it.
235    #[inline(always)]
236    pub fn new() -> Self {
237        Object {
238            store: Vec::new()
239        }
240    }
241
242    /// Create a new `Object` with memory preallocated for `capacity` number
243    /// of entries.
244    #[inline(always)]
245    pub fn with_capacity(capacity: usize) -> Self {
246        Object {
247            store: Vec::with_capacity(capacity)
248        }
249    }
250
251    #[inline]
252    fn node_at_index_mut(&mut self, index: usize) -> *mut Node {
253        unsafe { self.store.as_mut_ptr().offset(index as isize) }
254    }
255
256    #[inline(always)]
257    fn add_node(&mut self, key: &[u8], value: JsonValue, hash: u64) -> usize {
258        let index = self.store.len();
259
260        if index < self.store.capacity() {
261            // Because we've just checked the capacity, we can avoid
262            // using `push`, and instead do unsafe magic to memcpy
263            // the new node at the correct index without additional
264            // capacity or bound checks.
265            unsafe {
266                let node = Node::new(value, hash, key.len());
267                self.store.set_len(index + 1);
268
269                // To whomever gets concerned: I got better results with
270                // copy than write. Difference in benchmarks wasn't big though.
271                ptr::copy_nonoverlapping(
272                    &node as *const Node,
273                    self.store.as_mut_ptr().offset(index as isize),
274                    1,
275                );
276
277                // Since the Node has been copied, we need to forget about
278                // the owned value, else we may run into use after free.
279                mem::forget(node);
280            }
281
282            unsafe { self.store.get_unchecked_mut(index).key.attach(key) };
283        } else {
284            self.store.push(Node::new(value, hash, key.len()));
285
286            unsafe { self.store.get_unchecked_mut(index).key.attach(key) };
287
288            // Index up to the index (old length), we don't need to fix
289            // anything on the Node that just got pushed.
290            for node in self.store.iter_mut().take(index) {
291                node.key.fix_ptr();
292            }
293        }
294
295        index
296    }
297
298    /// Insert a new entry, or override an existing one. Note that `key` has
299    /// to be a `&str` slice and not an owned `String`. The internals of
300    /// `Object` will handle the heap allocation of the key if needed for
301    /// better performance.
302    #[inline]
303    pub fn insert(&mut self, key: &str, value: JsonValue) {
304        self.insert_index(key, value);
305    }
306
307    pub(crate) fn insert_index(&mut self, key: &str, value: JsonValue) -> usize {
308        let key = key.as_bytes();
309        let hash = hash_key(key);
310
311        if self.store.len() == 0 {
312            self.store.push(Node::new(value, hash, key.len()));
313            self.store[0].key.attach(key);
314            return 0;
315        }
316
317        let mut node = unsafe { &mut *self.node_at_index_mut(0) };
318        let mut parent = 0;
319
320        loop {
321            if hash == node.key.hash && key == node.key.as_bytes() {
322                node.value = value;
323                return parent;
324            } else if hash < node.key.hash {
325                if node.left != 0 {
326                    parent = node.left;
327                    node = unsafe { &mut *self.node_at_index_mut(node.left) };
328                    continue;
329                }
330                let index = self.add_node(key, value, hash);
331                self.store[parent].left = index;
332
333                return index;
334            } else {
335                if node.right != 0 {
336                    parent = node.right;
337                    node = unsafe { &mut *self.node_at_index_mut(node.right) };
338                    continue;
339                }
340                let index = self.add_node(key, value, hash);
341                self.store[parent].right = index;
342
343                return index;
344            }
345        }
346    }
347
348    #[inline]
349    pub(crate) fn override_at(&mut self, index: usize, value: JsonValue) {
350        self.store[index].value = value;
351    }
352
353    #[inline]
354    #[deprecated(since="0.11.11", note="Was only meant for internal use")]
355    pub fn override_last(&mut self, value: JsonValue) {
356        if let Some(node) = self.store.last_mut() {
357            node.value = value;
358        }
359    }
360
361    pub fn get(&self, key: &str) -> Option<&JsonValue> {
362        if self.store.len() == 0 {
363            return None;
364        }
365
366        let key = key.as_bytes();
367        let hash = hash_key(key);
368
369        let mut node = unsafe { self.store.get_unchecked(0) };
370
371        loop {
372            if hash == node.key.hash && key == node.key.as_bytes() {
373                return Some(&node.value);
374            } else if hash < node.key.hash {
375                if node.left == 0 {
376                    return None;
377                }
378                node = unsafe { self.store.get_unchecked(node.left) };
379            } else {
380                if node.right == 0 {
381                    return None;
382                }
383                node = unsafe { self.store.get_unchecked(node.right) };
384            }
385        }
386    }
387
388    pub fn get_mut(&mut self, key: &str) -> Option<&mut JsonValue> {
389        if self.store.len() == 0 {
390            return None;
391        }
392
393        let key = key.as_bytes();
394        let hash = hash_key(key);
395
396        let mut index = 0;
397        {
398            let mut node = unsafe { self.store.get_unchecked(0) };
399
400            loop {
401                if hash == node.key.hash && key == node.key.as_bytes() {
402                    break;
403                } else if hash < node.key.hash {
404                    if node.left == 0 {
405                        return None;
406                    }
407                    index = node.left;
408                    node = unsafe { self.store.get_unchecked(node.left) };
409                } else {
410                    if node.right == 0 {
411                        return None;
412                    }
413                    index = node.right;
414                    node = unsafe { self.store.get_unchecked(node.right) };
415                }
416            }
417        }
418
419        let node = unsafe { self.store.get_unchecked_mut(index) };
420
421        Some(&mut node.value)
422    }
423
424    /// Attempts to remove the value behind `key`, if successful
425    /// will return the `JsonValue` stored behind the `key`.
426    pub fn remove(&mut self, key: &str) -> Option<JsonValue> {
427        if self.store.len() == 0 {
428            return None;
429        }
430
431        let key = key.as_bytes();
432        let hash = hash_key(key);
433        let mut index = 0;
434
435        {
436            let mut node = unsafe { self.store.get_unchecked(0) };
437
438            // Try to find the node
439            loop {
440                if hash == node.key.hash && key == node.key.as_bytes() {
441                    break;
442                } else if hash < node.key.hash {
443                    if node.left == 0 {
444                        return None;
445                    }
446                    index = node.left;
447                    node = unsafe { self.store.get_unchecked(node.left) };
448                } else {
449                    if node.right == 0 {
450                        return None;
451                    }
452                    index = node.right;
453                    node = unsafe { self.store.get_unchecked(node.right) };
454                }
455            }
456        }
457
458        // Removing a node would screw the tree badly, it's easier to just
459        // recreate it. This is a very costly operation, but removing nodes
460        // in JSON shouldn't happen very often if at all. Optimizing this
461        // can wait for better times.
462        let mut new_object = Object::with_capacity(self.store.len() - 1);
463        let mut removed = None;
464
465        for (i, node) in self.store.iter_mut().enumerate() {
466            if i == index {
467                // Rust doesn't like us moving things from `node`, even if
468                // it is owned. Replace fixes that.
469                removed = Some(mem::replace(&mut node.value, JsonValue::Null));
470            } else {
471                let value = mem::replace(&mut node.value, JsonValue::Null);
472
473                new_object.insert(node.key.as_str(), value);
474            }
475        }
476
477        mem::swap(self, &mut new_object);
478
479        removed
480    }
481
482    #[inline(always)]
483    pub fn len(&self) -> usize {
484        self.store.len()
485    }
486
487    #[inline(always)]
488    pub fn is_empty(&self) -> bool {
489        self.store.is_empty()
490    }
491
492    /// Wipe the `Object` clear. The capacity will remain untouched.
493    pub fn clear(&mut self) {
494        self.store.clear();
495    }
496
497    #[inline(always)]
498    pub fn iter(&self) -> Iter {
499        Iter {
500            inner: self.store.iter()
501        }
502    }
503
504    #[inline(always)]
505    pub fn iter_mut(&mut self) -> IterMut {
506        IterMut {
507            inner: self.store.iter_mut()
508        }
509    }
510
511    /// Prints out the value as JSON string.
512    pub fn dump(&self) -> String {
513        let mut gen = DumpGenerator::new();
514        gen.write_object(self).expect("Can't fail");
515        gen.consume()
516    }
517
518    /// Pretty prints out the value as JSON string. Takes an argument that's
519    /// number of spaces to indent new blocks with.
520    pub fn pretty(&self, spaces: u16) -> String {
521        let mut gen = PrettyGenerator::new(spaces);
522        gen.write_object(self).expect("Can't fail");
523        gen.consume()
524    }
525}
526
527// Custom implementation of `Clone`, as new heap allocation means
528// we have to fix key pointers everywhere!
529impl Clone for Object {
530    fn clone(&self) -> Self {
531        let mut store = self.store.clone();
532
533        for node in store.iter_mut() {
534            node.key.fix_ptr();
535        }
536
537        Object {
538            store: store
539        }
540    }
541}
542
543impl<K: AsRef<str>, V: Into<JsonValue>> FromIterator<(K, V)> for Object {
544    fn from_iter<I: IntoIterator<Item=(K, V)>>(iter: I) -> Self {
545        let iter = iter.into_iter();
546        let mut object = Object::with_capacity(iter.size_hint().0);
547
548        for (key, value) in iter {
549            object.insert(key.as_ref(), value.into());
550        }
551
552        object
553    }
554}
555
556// Because keys can inserted in different order, the safe way to
557// compare `Object`s is to iterate over one and check if the other
558// has all the same keys.
559impl PartialEq for Object {
560    fn eq(&self, other: &Object) -> bool {
561        if self.len() != other.len() {
562            return false;
563        }
564
565        for (key, value) in self.iter() {
566            match other.get(key) {
567                Some(ref other_val) => if *other_val != value { return false; },
568                None                => return false
569            }
570        }
571
572        true
573    }
574}
575
576pub struct Iter<'a> {
577    inner: slice::Iter<'a, Node>
578}
579
580impl<'a> Iter<'a> {
581    /// Create an empty iterator that always returns `None`
582    pub fn empty() -> Self {
583        Iter {
584            inner: [].iter()
585        }
586    }
587}
588
589impl<'a> Iterator for Iter<'a> {
590    type Item = (&'a str, &'a JsonValue);
591
592    #[inline(always)]
593    fn next(&mut self) -> Option<Self::Item> {
594        self.inner.next().map(|node| (node.key.as_str(), &node.value))
595    }
596}
597
598impl<'a> DoubleEndedIterator for Iter<'a> {
599    #[inline(always)]
600    fn next_back(&mut self) -> Option<Self::Item> {
601        self.inner.next_back().map(|node| (node.key.as_str(), &node.value))
602    }
603}
604
605impl<'a> ExactSizeIterator for Iter<'a> {
606    fn len(&self) -> usize {
607        self.inner.len()
608    }
609}
610
611pub struct IterMut<'a> {
612    inner: slice::IterMut<'a, Node>
613}
614
615impl<'a> IterMut<'a> {
616    /// Create an empty iterator that always returns `None`
617    pub fn empty() -> Self {
618        IterMut {
619            inner: [].iter_mut()
620        }
621    }
622}
623
624impl<'a> Iterator for IterMut<'a> {
625    type Item = (&'a str, &'a mut JsonValue);
626
627    #[inline(always)]
628    fn next(&mut self) -> Option<Self::Item> {
629        self.inner.next().map(|node| (node.key.as_str(), &mut node.value))
630    }
631}
632
633impl<'a> DoubleEndedIterator for IterMut<'a> {
634    #[inline(always)]
635    fn next_back(&mut self) -> Option<Self::Item> {
636        self.inner.next_back().map(|node| (node.key.as_str(), &mut node.value))
637    }
638}
639
640impl<'a> ExactSizeIterator for IterMut<'a> {
641    fn len(&self) -> usize {
642        self.inner.len()
643    }
644}
645
646/// Implements indexing by `&str` to easily access object members:
647///
648/// ## Example
649///
650/// ```
651/// # #[macro_use]
652/// # extern crate json;
653/// # use json::JsonValue;
654/// #
655/// # fn main() {
656/// let value = object!{
657///     foo: "bar"
658/// };
659///
660/// if let JsonValue::Object(object) = value {
661///   assert!(object["foo"] == "bar");
662/// }
663/// # }
664/// ```
665// TODO: doc
666impl<'a> Index<&'a str> for Object {
667    type Output = JsonValue;
668
669    fn index(&self, index: &str) -> &JsonValue {
670        match self.get(index) {
671            Some(value) => value,
672            _ => &NULL
673        }
674    }
675}
676
677impl Index<String> for Object {
678    type Output = JsonValue;
679
680    fn index(&self, index: String) -> &JsonValue {
681        self.index(index.deref())
682    }
683}
684
685impl<'a> Index<&'a String> for Object {
686    type Output = JsonValue;
687
688    fn index(&self, index: &String) -> &JsonValue {
689        self.index(index.deref())
690    }
691}
692
693/// Implements mutable indexing by `&str` to easily modify object members:
694///
695/// ## Example
696///
697/// ```
698/// # #[macro_use]
699/// # extern crate json;
700/// # use json::JsonValue;
701/// #
702/// # fn main() {
703/// let value = object!{};
704///
705/// if let JsonValue::Object(mut object) = value {
706///   object["foo"] = 42.into();
707///
708///   assert!(object["foo"] == 42);
709/// }
710/// # }
711/// ```
712impl<'a> IndexMut<&'a str> for Object {
713    fn index_mut(&mut self, index: &str) -> &mut JsonValue {
714        if self.get(index).is_none() {
715            self.insert(index, JsonValue::Null);
716        }
717        self.get_mut(index).unwrap()
718    }
719}
720
721impl IndexMut<String> for Object {
722    fn index_mut(&mut self, index: String) -> &mut JsonValue {
723        self.index_mut(index.deref())
724    }
725}
726
727impl<'a> IndexMut<&'a String> for Object {
728    fn index_mut(&mut self, index: &String) -> &mut JsonValue {
729        self.index_mut(index.deref())
730    }
731}