jmt/
tree_cache.rs

1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! A transaction can have multiple operations on state. For example, it might update values
5//! for a few existing keys. Imagine that we have the following tree.
6//!
7//! ```text
8//!                 root0
9//!                 /    \
10//!                /      \
11//!  key1 => value11        key2 => value21
12//! ```
13//!
14//! The next transaction updates `key1`'s value to `value12` and `key2`'s value to `value22`.
15//! Let's assume we update key2 first. Then the tree becomes:
16//!
17//! ```text
18//!                   (on disk)              (in memory)
19//!                     root0                  root1'
20//!                    /     \                /     \
21//!                   /   ___ \ _____________/       \
22//!                  /  _/     \                      \
23//!                 / _/        \                      \
24//!                / /           \                      \
25//!   key1 => value11           key2 => value21       key2 => value22
26//!      (on disk)                 (on disk)            (in memory)
27//! ```
28//!
29//! Note that
30//!   1) we created a new version of the tree with `root1'` and the new `key2` node generated;
31//!   2) both `root1'` and the new `key2` node are still held in memory within a batch of nodes
32//!      that will be written into db atomically.
33//!
34//! Next, we need to update `key1`'s value. This time we are dealing with the tree starting from
35//! the new root. Part of the tree is in memory and the rest of it is in database. We'll update the
36//! left child and the new root. We should
37//!   1) create a new version for `key1` child.
38//!   2) update `root1'` directly instead of making another version.
39//! The resulting tree should look like:
40//!
41//! ```text
42//!                   (on disk)                                     (in memory)
43//!                     root0                                         root1''
44//!                    /     \                                       /     \
45//!                   /       \                                     /       \
46//!                  /         \                                   /         \
47//!                 /           \                                 /           \
48//!                /             \                               /             \
49//!   key1 => value11             key2 => value21  key1 => value12              key2 => value22
50//!      (on disk)                   (on disk)       (in memory)                  (in memory)
51//! ```
52//!
53//! This means that we need to be able to tell whether to create a new version of a node or to
54//! update an existing node by deleting it and creating a new node directly. `TreeCache` provides
55//! APIs to cache intermediate nodes and values in memory and simplify the actual tree
56//! implementation.
57//!
58//! If we are dealing with a single-version tree, any complex tree operation can be seen as a
59//! collection of the following operations:
60//!   - Put a new node.
61//!   - Delete a node.
62//! When we apply these operations on a multi-version tree:
63//!   1) Put a new node.
64//!   2) When we remove a node, if the node is in the previous on-disk version, we don't need to do
65//!      anything. Otherwise we delete it from the tree cache.
66//! Updating node could be operated as deletion of the node followed by insertion of the updated
67//! node.
68
69use alloc::{collections::BTreeSet, vec::Vec};
70#[cfg(not(feature = "std"))]
71use hashbrown::{hash_map::Entry, HashMap, HashSet};
72#[cfg(feature = "std")]
73use std::collections::{hash_map::Entry, HashMap, HashSet};
74
75use anyhow::{bail, Result};
76
77use crate::{
78    node_type::{Node, NodeKey},
79    storage::{
80        NodeBatch, NodeStats, StaleNodeIndex, StaleNodeIndexBatch, TreeReader, TreeUpdateBatch,
81    },
82    types::{Version, PRE_GENESIS_VERSION},
83    KeyHash, OwnedValue, RootHash, SimpleHasher,
84};
85
86/// `FrozenTreeCache` is used as a field of `TreeCache` storing all the nodes and values that
87/// are generated by earlier transactions so they have to be immutable. The motivation of
88/// `FrozenTreeCache` is to let `TreeCache` freeze intermediate results from each transaction to
89/// help commit more than one transaction in a row atomically.
90struct FrozenTreeCache {
91    /// Immutable node_cache.
92    node_cache: NodeBatch,
93
94    /// Immutable stale_node_index_cache.
95    stale_node_index_cache: StaleNodeIndexBatch,
96
97    /// the stats vector including the number of new nodes, new leaves, stale nodes and stale leaves.
98    node_stats: Vec<NodeStats>,
99
100    /// Frozen root hashes after each earlier transaction.
101    root_hashes: Vec<RootHash>,
102}
103
104impl FrozenTreeCache {
105    fn new() -> Self {
106        Self {
107            node_cache: Default::default(),
108            stale_node_index_cache: BTreeSet::new(),
109            node_stats: Vec::new(),
110            root_hashes: Vec::new(),
111        }
112    }
113}
114
115/// `TreeCache` is a in-memory cache for per-transaction updates of sparse Merkle nodes and values.
116pub struct TreeCache<'a, R> {
117    /// `NodeKey` of the current root node in cache.
118    root_node_key: NodeKey,
119
120    /// The version of the transaction to which the upcoming `put`s will be related.
121    next_version: Version,
122
123    /// Intermediate nodes keyed by node hash.
124    node_cache: HashMap<NodeKey, Node>,
125
126    /// Values keyed by version and keyhash.
127    // TODO(@preston-evans98): Convert to a vector once we remove the non-batch APIs.
128    // The Hashmap guarantees that if the same (version, key) pair is written several times, only the last
129    // change is saved, which means that the TreeWriter can process node batches in parallel without racing.
130    // The batch APIs already deduplicate operations on each key, so they don't need this HashMap.
131    value_cache: HashMap<(Version, KeyHash), Option<OwnedValue>>,
132
133    /// # of leaves in the `node_cache`,
134    num_new_leaves: usize,
135
136    /// Partial stale log. `NodeKey` to identify the stale record.
137    stale_node_index_cache: HashSet<NodeKey>,
138
139    /// # of leaves in the `stale_node_index_cache`,
140    num_stale_leaves: usize,
141
142    /// The immutable part of this cache, which will be committed to the underlying storage.
143    frozen_cache: FrozenTreeCache,
144
145    /// The underlying persistent storage.
146    reader: &'a R,
147}
148
149impl<'a, R> TreeCache<'a, R>
150where
151    R: 'a + TreeReader,
152{
153    /// Constructs a new `TreeCache` instance.
154    pub fn new(reader: &'a R, next_version: Version) -> Result<Self> {
155        let mut node_cache = HashMap::new();
156        let root_node_key = if next_version == 0 {
157            let pre_genesis_root_key = NodeKey::new_empty_path(PRE_GENESIS_VERSION);
158            let pre_genesis_root = reader.get_node_option(&pre_genesis_root_key)?;
159
160            match pre_genesis_root {
161                Some(_) => {
162                    // This is to support the extreme case where things really went wild,
163                    // and we need to ditch the transaction history and apply a new
164                    // genesis on top of an existing state db.
165                    pre_genesis_root_key
166                }
167                None => {
168                    // Hack: We need to start from an empty tree, so we insert
169                    // a null node beforehand deliberately to deal with this corner case.
170                    let genesis_root_key = NodeKey::new_empty_path(0);
171                    node_cache.insert(genesis_root_key.clone(), Node::new_null());
172                    genesis_root_key
173                }
174            }
175        } else {
176            NodeKey::new_empty_path(next_version - 1)
177        };
178        Ok(Self {
179            node_cache,
180            stale_node_index_cache: HashSet::new(),
181            frozen_cache: FrozenTreeCache::new(),
182            root_node_key,
183            next_version,
184            reader,
185            num_stale_leaves: 0,
186            num_new_leaves: 0,
187            value_cache: Default::default(),
188        })
189    }
190
191    #[cfg(feature = "migration")]
192    /// Instantiate a [`TreeCache`] over the a [`TreeReader`] that is defined
193    /// against a root node key at version `current_version`.
194    ///
195    /// # Usage
196    /// This method is used to perform incremental addition to a tree without
197    /// increasing the tree version's number.
198    pub fn new_overwrite(reader: &'a R, current_version: Version) -> Result<Self> {
199        let node_cache = HashMap::new();
200        let Some((node_key, _)) = reader.get_rightmost_leaf()? else {
201            bail!("creating an overwrite cache for an empty tree is not supported")
202        };
203
204        anyhow::ensure!(
205            node_key.version() == current_version,
206            "the supplied version is not the latest version of the tree"
207        );
208
209        let root_node_key = NodeKey::new_empty_path(current_version);
210        Ok(Self {
211            node_cache,
212            stale_node_index_cache: HashSet::new(),
213            frozen_cache: FrozenTreeCache::new(),
214            root_node_key,
215            next_version: current_version,
216            reader,
217            num_stale_leaves: 0,
218            num_new_leaves: 0,
219            value_cache: Default::default(),
220        })
221    }
222
223    /// Gets a node with given node key. If it doesn't exist in node cache, read from `reader`.
224    pub fn get_node(&self, node_key: &NodeKey) -> Result<Node> {
225        Ok(if let Some(node) = self.node_cache.get(node_key) {
226            node.clone()
227        } else if let Some(node) = self.frozen_cache.node_cache.nodes().get(node_key) {
228            node.clone()
229        } else {
230            self.reader.get_node(node_key)?
231        })
232    }
233
234    /// Gets a node with the given node key. If it doesn't exist in node cache, read from `reader`
235    /// If it doesn't exist anywhere, return `None`.
236    pub fn get_node_option(&self, node_key: &NodeKey) -> Result<Option<Node>> {
237        Ok(if let Some(node) = self.node_cache.get(node_key) {
238            Some(node.clone())
239        } else if let Some(node) = self.frozen_cache.node_cache.nodes().get(node_key) {
240            Some(node.clone())
241        } else {
242            self.reader.get_node_option(node_key)?
243        })
244    }
245
246    /// Gets the current root node key.
247    pub fn get_root_node_key(&self) -> &NodeKey {
248        &self.root_node_key
249    }
250
251    /// Set roots `node_key`.
252    pub fn set_root_node_key(&mut self, root_node_key: NodeKey) {
253        self.root_node_key = root_node_key;
254    }
255
256    /// Puts the node with given hash as key into node_cache.
257    pub fn put_node(&mut self, node_key: NodeKey, new_node: Node) -> Result<()> {
258        match self.node_cache.entry(node_key) {
259            Entry::Vacant(o) => {
260                if new_node.is_leaf() {
261                    self.num_new_leaves += 1
262                }
263                o.insert(new_node);
264            }
265            Entry::Occupied(o) => bail!("Node with key {:?} already exists in NodeBatch", o.key()),
266        };
267        Ok(())
268    }
269
270    pub fn put_value(&mut self, version: Version, key_hash: KeyHash, value: Option<OwnedValue>) {
271        self.value_cache.insert((version, key_hash), value);
272    }
273
274    /// Deletes a node with given hash.
275    pub fn delete_node(&mut self, old_node_key: &NodeKey, is_leaf: bool) {
276        // If node cache doesn't have this node, it means the node is in the previous version of
277        // the tree on the disk.
278        if self.node_cache.remove(old_node_key).is_none() {
279            let is_new_entry = self.stale_node_index_cache.insert(old_node_key.clone());
280            assert!(is_new_entry, "Node gets stale twice unexpectedly.");
281            if is_leaf {
282                self.num_stale_leaves += 1;
283            }
284        } else if is_leaf {
285            self.num_new_leaves -= 1;
286        }
287    }
288
289    /// Freezes all the contents in cache to be immutable and clear `node_cache`.
290    pub fn freeze<H: SimpleHasher>(&mut self) -> Result<()> {
291        let mut root_node_key = self.get_root_node_key().clone();
292
293        let root_node = if let Some(root_node) = self.get_node_option(&root_node_key)? {
294            root_node
295        } else {
296            // If the root node does not exist, then we need to set it to the null node and record
297            // that node hash as the root hash of this version. This will happen if you delete as
298            // the first operation on an empty tree, but also if you manage to delete every single
299            // key-value mapping in the tree.
300            self.put_node(root_node_key.clone(), Node::new_null())?;
301            Node::Null
302        };
303
304        // Insert the root node's hash into the list of root hashes in the frozen cache, so that
305        // they can be extracted later after a sequence of transactions:
306        self.frozen_cache
307            .root_hashes
308            .push(RootHash(root_node.hash::<H>()));
309
310        // If the effect of this set of changes has been to do nothing, we still need to create a
311        // new root node that matches the anticipated version; we do this by copying the previous
312        // root node and incrementing the version. If we didn't do this, then any set of changes
313        // which failed to have an effect on the tree would mean that the *next* set of changes
314        // would be faced with a non-existent root node at the version it is expecting, since it's
315        // internally expected that the version increments every time the tree cache is frozen.
316        if self.next_version > 0
317            && self.node_cache.is_empty()
318            && self.stale_node_index_cache.is_empty()
319        {
320            let root_node = self.get_node(&self.root_node_key)?;
321            root_node_key.set_version(self.next_version);
322            self.put_node(root_node_key, root_node)?;
323        }
324
325        // Transfer all the state from this version of the cache into the immutable version of the
326        // cache, draining it and resetting it as we go:
327        let node_stats = NodeStats {
328            new_nodes: self.node_cache.len(),
329            new_leaves: self.num_new_leaves,
330            stale_nodes: self.stale_node_index_cache.len(),
331            stale_leaves: self.num_stale_leaves,
332        };
333        self.frozen_cache.node_stats.push(node_stats);
334        self.frozen_cache
335            .node_cache
336            .extend(self.node_cache.drain(), self.value_cache.drain());
337        let stale_since_version = self.next_version;
338        self.frozen_cache
339            .stale_node_index_cache
340            .extend(
341                self.stale_node_index_cache
342                    .drain()
343                    .map(|node_key| StaleNodeIndex {
344                        stale_since_version,
345                        node_key,
346                    }),
347            );
348
349        // Clean up
350        self.num_stale_leaves = 0;
351        self.num_new_leaves = 0;
352
353        // Prepare for the next version after freezing
354        self.next_version += 1;
355
356        Ok(())
357    }
358}
359
360impl<'a, R> TreeReader for TreeCache<'a, R>
361where
362    R: 'a + TreeReader,
363{
364    fn get_node_option(&self, node_key: &NodeKey) -> Result<Option<Node>> {
365        self.get_node_option(node_key)
366    }
367
368    fn get_value_option(
369        &self,
370        max_version: Version,
371        key_hash: KeyHash,
372    ) -> Result<Option<OwnedValue>> {
373        for ((version, _hash), value) in self
374            .value_cache
375            .iter()
376            .filter(|((_version, hash), _value)| *hash == key_hash)
377        {
378            if *version <= max_version {
379                return Ok(value.clone());
380            }
381        }
382
383        self.reader.get_value_option(max_version, key_hash)
384    }
385
386    fn get_rightmost_leaf(&self) -> Result<Option<(NodeKey, crate::storage::LeafNode)>> {
387        unimplemented!("get_rightmost_leaf should not be used with a tree cache")
388    }
389}
390
391impl<'a, R> From<TreeCache<'a, R>> for (Vec<RootHash>, TreeUpdateBatch)
392where
393    R: 'a + TreeReader,
394{
395    fn from(tree_cache: TreeCache<'a, R>) -> Self {
396        (
397            tree_cache.frozen_cache.root_hashes,
398            TreeUpdateBatch {
399                node_batch: tree_cache.frozen_cache.node_cache,
400                stale_node_index_batch: tree_cache.frozen_cache.stale_node_index_cache,
401                node_stats: tree_cache.frozen_cache.node_stats,
402            },
403        )
404    }
405}