jmt/tree_cache.rs
1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! A transaction can have multiple operations on state. For example, it might update values
5//! for a few existing keys. Imagine that we have the following tree.
6//!
7//! ```text
8//! root0
9//! / \
10//! / \
11//! key1 => value11 key2 => value21
12//! ```
13//!
14//! The next transaction updates `key1`'s value to `value12` and `key2`'s value to `value22`.
15//! Let's assume we update key2 first. Then the tree becomes:
16//!
17//! ```text
18//! (on disk) (in memory)
19//! root0 root1'
20//! / \ / \
21//! / ___ \ _____________/ \
22//! / _/ \ \
23//! / _/ \ \
24//! / / \ \
25//! key1 => value11 key2 => value21 key2 => value22
26//! (on disk) (on disk) (in memory)
27//! ```
28//!
29//! Note that
30//! 1) we created a new version of the tree with `root1'` and the new `key2` node generated;
31//! 2) both `root1'` and the new `key2` node are still held in memory within a batch of nodes
32//! that will be written into db atomically.
33//!
34//! Next, we need to update `key1`'s value. This time we are dealing with the tree starting from
35//! the new root. Part of the tree is in memory and the rest of it is in database. We'll update the
36//! left child and the new root. We should
37//! 1) create a new version for `key1` child.
38//! 2) update `root1'` directly instead of making another version.
39//! The resulting tree should look like:
40//!
41//! ```text
42//! (on disk) (in memory)
43//! root0 root1''
44//! / \ / \
45//! / \ / \
46//! / \ / \
47//! / \ / \
48//! / \ / \
49//! key1 => value11 key2 => value21 key1 => value12 key2 => value22
50//! (on disk) (on disk) (in memory) (in memory)
51//! ```
52//!
53//! This means that we need to be able to tell whether to create a new version of a node or to
54//! update an existing node by deleting it and creating a new node directly. `TreeCache` provides
55//! APIs to cache intermediate nodes and values in memory and simplify the actual tree
56//! implementation.
57//!
58//! If we are dealing with a single-version tree, any complex tree operation can be seen as a
59//! collection of the following operations:
60//! - Put a new node.
61//! - Delete a node.
62//! When we apply these operations on a multi-version tree:
63//! 1) Put a new node.
64//! 2) When we remove a node, if the node is in the previous on-disk version, we don't need to do
65//! anything. Otherwise we delete it from the tree cache.
66//! Updating node could be operated as deletion of the node followed by insertion of the updated
67//! node.
68
69use alloc::{collections::BTreeSet, vec::Vec};
70#[cfg(not(feature = "std"))]
71use hashbrown::{hash_map::Entry, HashMap, HashSet};
72#[cfg(feature = "std")]
73use std::collections::{hash_map::Entry, HashMap, HashSet};
74
75use anyhow::{bail, Result};
76
77use crate::{
78 node_type::{Node, NodeKey},
79 storage::{
80 NodeBatch, NodeStats, StaleNodeIndex, StaleNodeIndexBatch, TreeReader, TreeUpdateBatch,
81 },
82 types::{Version, PRE_GENESIS_VERSION},
83 KeyHash, OwnedValue, RootHash, SimpleHasher,
84};
85
86/// `FrozenTreeCache` is used as a field of `TreeCache` storing all the nodes and values that
87/// are generated by earlier transactions so they have to be immutable. The motivation of
88/// `FrozenTreeCache` is to let `TreeCache` freeze intermediate results from each transaction to
89/// help commit more than one transaction in a row atomically.
90struct FrozenTreeCache {
91 /// Immutable node_cache.
92 node_cache: NodeBatch,
93
94 /// Immutable stale_node_index_cache.
95 stale_node_index_cache: StaleNodeIndexBatch,
96
97 /// the stats vector including the number of new nodes, new leaves, stale nodes and stale leaves.
98 node_stats: Vec<NodeStats>,
99
100 /// Frozen root hashes after each earlier transaction.
101 root_hashes: Vec<RootHash>,
102}
103
104impl FrozenTreeCache {
105 fn new() -> Self {
106 Self {
107 node_cache: Default::default(),
108 stale_node_index_cache: BTreeSet::new(),
109 node_stats: Vec::new(),
110 root_hashes: Vec::new(),
111 }
112 }
113}
114
115/// `TreeCache` is a in-memory cache for per-transaction updates of sparse Merkle nodes and values.
116pub struct TreeCache<'a, R> {
117 /// `NodeKey` of the current root node in cache.
118 root_node_key: NodeKey,
119
120 /// The version of the transaction to which the upcoming `put`s will be related.
121 next_version: Version,
122
123 /// Intermediate nodes keyed by node hash.
124 node_cache: HashMap<NodeKey, Node>,
125
126 /// Values keyed by version and keyhash.
127 // TODO(@preston-evans98): Convert to a vector once we remove the non-batch APIs.
128 // The Hashmap guarantees that if the same (version, key) pair is written several times, only the last
129 // change is saved, which means that the TreeWriter can process node batches in parallel without racing.
130 // The batch APIs already deduplicate operations on each key, so they don't need this HashMap.
131 value_cache: HashMap<(Version, KeyHash), Option<OwnedValue>>,
132
133 /// # of leaves in the `node_cache`,
134 num_new_leaves: usize,
135
136 /// Partial stale log. `NodeKey` to identify the stale record.
137 stale_node_index_cache: HashSet<NodeKey>,
138
139 /// # of leaves in the `stale_node_index_cache`,
140 num_stale_leaves: usize,
141
142 /// The immutable part of this cache, which will be committed to the underlying storage.
143 frozen_cache: FrozenTreeCache,
144
145 /// The underlying persistent storage.
146 reader: &'a R,
147}
148
149impl<'a, R> TreeCache<'a, R>
150where
151 R: 'a + TreeReader,
152{
153 /// Constructs a new `TreeCache` instance.
154 pub fn new(reader: &'a R, next_version: Version) -> Result<Self> {
155 let mut node_cache = HashMap::new();
156 let root_node_key = if next_version == 0 {
157 let pre_genesis_root_key = NodeKey::new_empty_path(PRE_GENESIS_VERSION);
158 let pre_genesis_root = reader.get_node_option(&pre_genesis_root_key)?;
159
160 match pre_genesis_root {
161 Some(_) => {
162 // This is to support the extreme case where things really went wild,
163 // and we need to ditch the transaction history and apply a new
164 // genesis on top of an existing state db.
165 pre_genesis_root_key
166 }
167 None => {
168 // Hack: We need to start from an empty tree, so we insert
169 // a null node beforehand deliberately to deal with this corner case.
170 let genesis_root_key = NodeKey::new_empty_path(0);
171 node_cache.insert(genesis_root_key.clone(), Node::new_null());
172 genesis_root_key
173 }
174 }
175 } else {
176 NodeKey::new_empty_path(next_version - 1)
177 };
178 Ok(Self {
179 node_cache,
180 stale_node_index_cache: HashSet::new(),
181 frozen_cache: FrozenTreeCache::new(),
182 root_node_key,
183 next_version,
184 reader,
185 num_stale_leaves: 0,
186 num_new_leaves: 0,
187 value_cache: Default::default(),
188 })
189 }
190
191 #[cfg(feature = "migration")]
192 /// Instantiate a [`TreeCache`] over the a [`TreeReader`] that is defined
193 /// against a root node key at version `current_version`.
194 ///
195 /// # Usage
196 /// This method is used to perform incremental addition to a tree without
197 /// increasing the tree version's number.
198 pub fn new_overwrite(reader: &'a R, current_version: Version) -> Result<Self> {
199 let node_cache = HashMap::new();
200 let Some((node_key, _)) = reader.get_rightmost_leaf()? else {
201 bail!("creating an overwrite cache for an empty tree is not supported")
202 };
203
204 anyhow::ensure!(
205 node_key.version() == current_version,
206 "the supplied version is not the latest version of the tree"
207 );
208
209 let root_node_key = NodeKey::new_empty_path(current_version);
210 Ok(Self {
211 node_cache,
212 stale_node_index_cache: HashSet::new(),
213 frozen_cache: FrozenTreeCache::new(),
214 root_node_key,
215 next_version: current_version,
216 reader,
217 num_stale_leaves: 0,
218 num_new_leaves: 0,
219 value_cache: Default::default(),
220 })
221 }
222
223 /// Gets a node with given node key. If it doesn't exist in node cache, read from `reader`.
224 pub fn get_node(&self, node_key: &NodeKey) -> Result<Node> {
225 Ok(if let Some(node) = self.node_cache.get(node_key) {
226 node.clone()
227 } else if let Some(node) = self.frozen_cache.node_cache.nodes().get(node_key) {
228 node.clone()
229 } else {
230 self.reader.get_node(node_key)?
231 })
232 }
233
234 /// Gets a node with the given node key. If it doesn't exist in node cache, read from `reader`
235 /// If it doesn't exist anywhere, return `None`.
236 pub fn get_node_option(&self, node_key: &NodeKey) -> Result<Option<Node>> {
237 Ok(if let Some(node) = self.node_cache.get(node_key) {
238 Some(node.clone())
239 } else if let Some(node) = self.frozen_cache.node_cache.nodes().get(node_key) {
240 Some(node.clone())
241 } else {
242 self.reader.get_node_option(node_key)?
243 })
244 }
245
246 /// Gets the current root node key.
247 pub fn get_root_node_key(&self) -> &NodeKey {
248 &self.root_node_key
249 }
250
251 /// Set roots `node_key`.
252 pub fn set_root_node_key(&mut self, root_node_key: NodeKey) {
253 self.root_node_key = root_node_key;
254 }
255
256 /// Puts the node with given hash as key into node_cache.
257 pub fn put_node(&mut self, node_key: NodeKey, new_node: Node) -> Result<()> {
258 match self.node_cache.entry(node_key) {
259 Entry::Vacant(o) => {
260 if new_node.is_leaf() {
261 self.num_new_leaves += 1
262 }
263 o.insert(new_node);
264 }
265 Entry::Occupied(o) => bail!("Node with key {:?} already exists in NodeBatch", o.key()),
266 };
267 Ok(())
268 }
269
270 pub fn put_value(&mut self, version: Version, key_hash: KeyHash, value: Option<OwnedValue>) {
271 self.value_cache.insert((version, key_hash), value);
272 }
273
274 /// Deletes a node with given hash.
275 pub fn delete_node(&mut self, old_node_key: &NodeKey, is_leaf: bool) {
276 // If node cache doesn't have this node, it means the node is in the previous version of
277 // the tree on the disk.
278 if self.node_cache.remove(old_node_key).is_none() {
279 let is_new_entry = self.stale_node_index_cache.insert(old_node_key.clone());
280 assert!(is_new_entry, "Node gets stale twice unexpectedly.");
281 if is_leaf {
282 self.num_stale_leaves += 1;
283 }
284 } else if is_leaf {
285 self.num_new_leaves -= 1;
286 }
287 }
288
289 /// Freezes all the contents in cache to be immutable and clear `node_cache`.
290 pub fn freeze<H: SimpleHasher>(&mut self) -> Result<()> {
291 let mut root_node_key = self.get_root_node_key().clone();
292
293 let root_node = if let Some(root_node) = self.get_node_option(&root_node_key)? {
294 root_node
295 } else {
296 // If the root node does not exist, then we need to set it to the null node and record
297 // that node hash as the root hash of this version. This will happen if you delete as
298 // the first operation on an empty tree, but also if you manage to delete every single
299 // key-value mapping in the tree.
300 self.put_node(root_node_key.clone(), Node::new_null())?;
301 Node::Null
302 };
303
304 // Insert the root node's hash into the list of root hashes in the frozen cache, so that
305 // they can be extracted later after a sequence of transactions:
306 self.frozen_cache
307 .root_hashes
308 .push(RootHash(root_node.hash::<H>()));
309
310 // If the effect of this set of changes has been to do nothing, we still need to create a
311 // new root node that matches the anticipated version; we do this by copying the previous
312 // root node and incrementing the version. If we didn't do this, then any set of changes
313 // which failed to have an effect on the tree would mean that the *next* set of changes
314 // would be faced with a non-existent root node at the version it is expecting, since it's
315 // internally expected that the version increments every time the tree cache is frozen.
316 if self.next_version > 0
317 && self.node_cache.is_empty()
318 && self.stale_node_index_cache.is_empty()
319 {
320 let root_node = self.get_node(&self.root_node_key)?;
321 root_node_key.set_version(self.next_version);
322 self.put_node(root_node_key, root_node)?;
323 }
324
325 // Transfer all the state from this version of the cache into the immutable version of the
326 // cache, draining it and resetting it as we go:
327 let node_stats = NodeStats {
328 new_nodes: self.node_cache.len(),
329 new_leaves: self.num_new_leaves,
330 stale_nodes: self.stale_node_index_cache.len(),
331 stale_leaves: self.num_stale_leaves,
332 };
333 self.frozen_cache.node_stats.push(node_stats);
334 self.frozen_cache
335 .node_cache
336 .extend(self.node_cache.drain(), self.value_cache.drain());
337 let stale_since_version = self.next_version;
338 self.frozen_cache
339 .stale_node_index_cache
340 .extend(
341 self.stale_node_index_cache
342 .drain()
343 .map(|node_key| StaleNodeIndex {
344 stale_since_version,
345 node_key,
346 }),
347 );
348
349 // Clean up
350 self.num_stale_leaves = 0;
351 self.num_new_leaves = 0;
352
353 // Prepare for the next version after freezing
354 self.next_version += 1;
355
356 Ok(())
357 }
358}
359
360impl<'a, R> TreeReader for TreeCache<'a, R>
361where
362 R: 'a + TreeReader,
363{
364 fn get_node_option(&self, node_key: &NodeKey) -> Result<Option<Node>> {
365 self.get_node_option(node_key)
366 }
367
368 fn get_value_option(
369 &self,
370 max_version: Version,
371 key_hash: KeyHash,
372 ) -> Result<Option<OwnedValue>> {
373 for ((version, _hash), value) in self
374 .value_cache
375 .iter()
376 .filter(|((_version, hash), _value)| *hash == key_hash)
377 {
378 if *version <= max_version {
379 return Ok(value.clone());
380 }
381 }
382
383 self.reader.get_value_option(max_version, key_hash)
384 }
385
386 fn get_rightmost_leaf(&self) -> Result<Option<(NodeKey, crate::storage::LeafNode)>> {
387 unimplemented!("get_rightmost_leaf should not be used with a tree cache")
388 }
389}
390
391impl<'a, R> From<TreeCache<'a, R>> for (Vec<RootHash>, TreeUpdateBatch)
392where
393 R: 'a + TreeReader,
394{
395 fn from(tree_cache: TreeCache<'a, R>) -> Self {
396 (
397 tree_cache.frozen_cache.root_hashes,
398 TreeUpdateBatch {
399 node_batch: tree_cache.frozen_cache.node_cache,
400 stale_node_index_batch: tree_cache.frozen_cache.stale_node_index_cache,
401 node_stats: tree_cache.frozen_cache.node_stats,
402 },
403 )
404 }
405}