jmt/
iterator.rs

1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! This module implements `JellyfishMerkleIterator`. Initialized with a version and a key, the
5//! iterator generates all the key-value pairs in this version of the tree, starting from the
6//! smallest key that is greater or equal to the given key, by performing a depth first traversal
7//! on the tree.
8
9use alloc::{sync::Arc, vec::Vec};
10
11use anyhow::{bail, ensure, format_err, Result};
12
13use crate::{
14    node_type::{Child, InternalNode, Node, NodeKey},
15    storage::TreeReader,
16    types::{
17        nibble::{nibble_path::NibblePath, Nibble, ROOT_NIBBLE_HEIGHT},
18        Version,
19    },
20    KeyHash, OwnedValue,
21};
22
23/// `NodeVisitInfo` keeps track of the status of an internal node during the iteration process. It
24/// indicates which ones of its children have been visited.
25#[derive(Debug)]
26struct NodeVisitInfo {
27    /// The key to this node.
28    node_key: NodeKey,
29
30    /// The node itself.
31    node: InternalNode,
32
33    /// The bitmap indicating which children exist. It is generated by running
34    /// `self.node.generate_bitmaps().0` and cached here.
35    children_bitmap: u16,
36
37    /// This integer always has exactly one 1-bit. The position of the 1-bit (from LSB) indicates
38    /// the next child to visit in the iteration process. All the ones on the left have already
39    /// been visited. All the chilren on the right (including this one) have not been visited yet.
40    next_child_to_visit: u16,
41}
42
43impl NodeVisitInfo {
44    /// Constructs a new `NodeVisitInfo` with given node key and node. `next_child_to_visit` will
45    /// be set to the leftmost child.
46    fn new(node_key: NodeKey, node: InternalNode) -> Self {
47        let (children_bitmap, _) = node.generate_bitmaps();
48        assert!(children_bitmap != 0);
49        Self {
50            node_key,
51            node,
52            children_bitmap,
53            next_child_to_visit: 1 << children_bitmap.trailing_zeros(),
54        }
55    }
56
57    /// Same as `new` but points `next_child_to_visit` to a specific location. If the child
58    /// corresponding to `next_child_to_visit` does not exist, set it to the next one on the
59    /// right.
60    fn new_next_child_to_visit(
61        node_key: NodeKey,
62        node: InternalNode,
63        next_child_to_visit: Nibble,
64    ) -> Self {
65        let (children_bitmap, _) = node.generate_bitmaps();
66        let mut next_child_to_visit = 1 << u8::from(next_child_to_visit);
67        assert!(children_bitmap >= next_child_to_visit);
68        while next_child_to_visit & children_bitmap == 0 {
69            next_child_to_visit <<= 1;
70        }
71        Self {
72            node_key,
73            node,
74            children_bitmap,
75            next_child_to_visit,
76        }
77    }
78
79    /// Whether the next child to visit is the rightmost one.
80    fn is_rightmost(&self) -> bool {
81        assert!(self.next_child_to_visit.leading_zeros() >= self.children_bitmap.leading_zeros());
82        self.next_child_to_visit.leading_zeros() == self.children_bitmap.leading_zeros()
83    }
84
85    /// Advances `next_child_to_visit` to the next child on the right.
86    fn advance(&mut self) {
87        assert!(!self.is_rightmost(), "Advancing past rightmost child.");
88        self.next_child_to_visit <<= 1;
89        while self.next_child_to_visit & self.children_bitmap == 0 {
90            self.next_child_to_visit <<= 1;
91        }
92    }
93}
94
95/// An iterator over all key-value pairs in a [`JellyfishMerkleTree`](crate::JellyfishMerkleTree).
96///
97/// Initialized with a version and a key, the iterator generates all the
98/// key-value pairs in this version of the tree, starting from the smallest key
99/// that is greater or equal to the given key, by performing a depth first
100/// traversal on the tree.
101pub struct JellyfishMerkleIterator<R> {
102    /// The storage engine from which we can read nodes using node keys.
103    reader: Arc<R>,
104
105    /// The version of the tree this iterator is running on.
106    version: Version,
107
108    /// The stack used for depth first traversal.
109    parent_stack: Vec<NodeVisitInfo>,
110
111    /// Whether the iteration has finished. Usually this can be determined by checking whether
112    /// `self.parent_stack` is empty. But in case of a tree with a single leaf, we need this
113    /// additional bit.
114    done: bool,
115}
116
117impl<R> JellyfishMerkleIterator<R>
118where
119    R: TreeReader,
120{
121    /// Constructs a new iterator. This puts the internal state in the correct position, so the
122    /// following `next` call will yield the smallest key that is greater or equal to
123    /// `starting_key`.
124    pub fn new(reader: Arc<R>, version: Version, starting_key: KeyHash) -> Result<Self> {
125        let mut parent_stack = Vec::new();
126        let mut done = false;
127
128        let mut current_node_key = NodeKey::new_empty_path(version);
129        let nibble_path = NibblePath::new(starting_key.0.to_vec());
130        let mut nibble_iter = nibble_path.nibbles();
131
132        while let Node::Internal(internal_node) = reader.get_node(&current_node_key)? {
133            let child_index = nibble_iter.next().expect("Should have enough nibbles.");
134            match internal_node.child(child_index) {
135                Some(child) => {
136                    // If this child exists, we just push the node onto stack and repeat.
137                    parent_stack.push(NodeVisitInfo::new_next_child_to_visit(
138                        current_node_key.clone(),
139                        internal_node.clone(),
140                        child_index,
141                    ));
142                    current_node_key =
143                        current_node_key.gen_child_node_key(child.version, child_index);
144                }
145                None => {
146                    let (bitmap, _) = internal_node.generate_bitmaps();
147                    if u32::from(u8::from(child_index)) < 15 - bitmap.leading_zeros() {
148                        // If this child does not exist and there's another child on the right, we
149                        // set the child on the right to be the next one to visit.
150                        parent_stack.push(NodeVisitInfo::new_next_child_to_visit(
151                            current_node_key,
152                            internal_node,
153                            child_index,
154                        ));
155                    } else {
156                        // Otherwise we have done visiting this node. Go backward and clean up the
157                        // stack.
158                        Self::cleanup_stack(&mut parent_stack);
159                    }
160                    return Ok(Self {
161                        reader,
162                        version,
163                        parent_stack,
164                        done,
165                    });
166                }
167            }
168        }
169
170        match reader.get_node(&current_node_key)? {
171            Node::Internal(_) => unreachable!("Should have reached the bottom of the tree."),
172            Node::Leaf(leaf_node) => {
173                if leaf_node.key_hash() < starting_key {
174                    Self::cleanup_stack(&mut parent_stack);
175                    if parent_stack.is_empty() {
176                        done = true;
177                    }
178                }
179            }
180            Node::Null => done = true,
181        }
182
183        Ok(Self {
184            reader,
185            version,
186            parent_stack,
187            done,
188        })
189    }
190
191    fn cleanup_stack(parent_stack: &mut Vec<NodeVisitInfo>) {
192        while let Some(info) = parent_stack.last_mut() {
193            if info.is_rightmost() {
194                parent_stack.pop();
195            } else {
196                info.advance();
197                break;
198            }
199        }
200    }
201
202    /// Constructs a new iterator. This puts the internal state in the correct position, so the
203    /// following `next` call will yield the leaf at `start_idx`.
204    pub fn new_by_index(reader: Arc<R>, version: Version, start_idx: usize) -> Result<Self> {
205        let mut parent_stack = Vec::new();
206
207        let mut current_node_key = NodeKey::new_empty_path(version);
208        let mut current_node = reader.get_node(&current_node_key)?;
209        let total_leaves = current_node.leaf_count();
210        if start_idx >= total_leaves {
211            return Ok(Self {
212                reader,
213                version,
214                parent_stack,
215                done: true,
216            });
217        }
218
219        let mut leaves_skipped = 0;
220        for _ in 0..=ROOT_NIBBLE_HEIGHT {
221            match current_node {
222                Node::Null => {
223                    unreachable!("The Node::Null case has already been covered before loop.")
224                }
225                Node::Leaf(_) => {
226                    ensure!(
227                        leaves_skipped == start_idx,
228                        "Bug: The leaf should be the exact one we are looking for.",
229                    );
230                    return Ok(Self {
231                        reader,
232                        version,
233                        parent_stack,
234                        done: false,
235                    });
236                }
237                Node::Internal(internal_node) => {
238                    let (nibble, child) =
239                        Self::skip_leaves(&internal_node, &mut leaves_skipped, start_idx)?;
240                    let next_node_key = current_node_key.gen_child_node_key(child.version, nibble);
241                    parent_stack.push(NodeVisitInfo::new_next_child_to_visit(
242                        current_node_key,
243                        internal_node,
244                        nibble,
245                    ));
246                    current_node_key = next_node_key;
247                }
248            };
249            current_node = reader.get_node(&current_node_key)?;
250        }
251
252        bail!("Bug: potential infinite loop.");
253    }
254
255    fn skip_leaves<'a>(
256        internal_node: &'a InternalNode,
257        leaves_skipped: &mut usize,
258        target_leaf_idx: usize,
259    ) -> Result<(Nibble, &'a Child)> {
260        for (nibble, child) in internal_node.children_sorted() {
261            let child_leaf_count = child.leaf_count();
262            // n.b. The index is 0-based, so to reach leaf at N, N previous ones need to be skipped.
263            if *leaves_skipped + child_leaf_count <= target_leaf_idx {
264                *leaves_skipped += child_leaf_count;
265            } else {
266                return Ok((nibble, child));
267            }
268        }
269
270        bail!("Bug: Internal node has less leaves than expected.");
271    }
272}
273
274impl<R> Iterator for JellyfishMerkleIterator<R>
275where
276    R: TreeReader,
277{
278    type Item = Result<(KeyHash, OwnedValue)>;
279
280    fn next(&mut self) -> Option<Self::Item> {
281        if self.done {
282            return None;
283        }
284
285        if self.parent_stack.is_empty() {
286            let root_node_key = NodeKey::new_empty_path(self.version);
287            match self.reader.get_node(&root_node_key) {
288                Ok(Node::Leaf(leaf_node)) => {
289                    // This means the entire tree has a single leaf node. The key of this leaf node
290                    // is greater or equal to `starting_key` (otherwise we would have set `done` to
291                    // true in `new`). Return the node and mark `self.done` so next time we return
292                    // None.
293                    self.done = true;
294                    return match self
295                        .reader
296                        .get_value(root_node_key.version(), leaf_node.key_hash())
297                    {
298                        Ok(value) => Some(Ok((leaf_node.key_hash(), value))),
299                        Err(e) => Some(Err(e)),
300                    };
301                }
302                Ok(Node::Internal(_)) => {
303                    // This means `starting_key` is bigger than every key in this tree, or we have
304                    // iterated past the last key.
305                    return None;
306                }
307                Ok(Node::Null) => unreachable!("We would have set done to true in new."),
308                Err(err) => return Some(Err(err)),
309            }
310        }
311
312        loop {
313            let last_visited_node_info = self
314                .parent_stack
315                .last()
316                .expect("We have checked that self.parent_stack is not empty.");
317            let child_index =
318                Nibble::from(last_visited_node_info.next_child_to_visit.trailing_zeros() as u8);
319            let node_key = last_visited_node_info.node_key.gen_child_node_key(
320                last_visited_node_info
321                    .node
322                    .child(child_index)
323                    .expect("Child should exist.")
324                    .version,
325                child_index,
326            );
327            match self.reader.get_node(&node_key) {
328                Ok(Node::Internal(internal_node)) => {
329                    let visit_info = NodeVisitInfo::new(node_key, internal_node);
330                    self.parent_stack.push(visit_info);
331                }
332                Ok(Node::Leaf(leaf_node)) => {
333                    return match self
334                        .reader
335                        .get_value(node_key.version(), leaf_node.key_hash())
336                    {
337                        Ok(value) => {
338                            let ret = (leaf_node.key_hash(), value);
339                            Self::cleanup_stack(&mut self.parent_stack);
340                            Some(Ok(ret))
341                        }
342                        Err(e) => Some(Err(e)),
343                    }
344                }
345                Ok(Node::Null) => return Some(Err(format_err!("Should not reach a null node."))),
346                Err(err) => return Some(Err(err)),
347            }
348        }
349    }
350}