jmt/lib.rs
1// Copyright (c) The Diem Core Contributors
2// SPDX-License-Identifier: Apache-2.0
3#![cfg_attr(not(feature = "std"), no_std)]
4#![forbid(unsafe_code)]
5
6//! This module implements [`JellyfishMerkleTree`] backed by storage module. The tree itself doesn't
7//! persist anything, but realizes the logic of R/W only. The write path will produce all the
8//! intermediate results in a batch for storage layer to commit and the read path will return
9//! results directly. The public APIs are only [`new`], [`put_value_sets`], [`put_value_set`] and
10//! [`get_with_proof`]. After each put with a `value_set` based on a known version, the tree will
11//! return a new root hash with a [`TreeUpdateBatch`] containing all the new nodes and indices of
12//! stale nodes.
13//!
14//! A Jellyfish Merkle Tree itself logically is a 256-bit sparse Merkle tree with an optimization
15//! that any subtree containing 0 or 1 leaf node will be replaced by that leaf node or a placeholder
16//! node with default hash value. With this optimization we can save CPU by avoiding hashing on
17//! many sparse levels in the tree. Physically, the tree is structurally similar to the modified
18//! Patricia Merkle tree of Ethereum but with some modifications. A standard Jellyfish Merkle tree
19//! will look like the following figure:
20//!
21//! ```text
22//! .──────────────────────.
23//! _.─────' `──────.
24//! _.──' `───.
25//! _.─' `──.
26//! _.─' `──.
27//! ,' `.
28//! ,─' '─.
29//! ,' `.
30//! ,' `.
31//! ╱ ╲
32//! ╱ ╲
33//! ╱ ╲
34//! ╱ ╲
35//! ; :
36//! ; :
37//! ; :
38//! │ │
39//! +──────────────────────────────────────────────────────────────────────────────────────────────+
40//! .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''. .''.
41//! / \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \/ \
42//! +----++----++----++----++----++----++----++----++----++----++----++----++----++----++----++----+
43//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
44//! ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
45//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
46//! ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
47//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
48//! ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
49//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
50//! ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
51//! ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( (
52//! ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■ ■
53//!
54//! ■: the [`Value`] type this tree stores.
55//! ```
56//!
57//! A Jellyfish Merkle Tree consists of [`InternalNode`] and [`LeafNode`]. [`InternalNode`] is like
58//! branch node in ethereum patricia merkle with 16 children to represent a 4-level binary tree and
59//! [`LeafNode`] is similar to that in patricia merkle too. In the above figure, each `bell` in the
60//! jellyfish is an [`InternalNode`] while each tentacle is a [`LeafNode`]. It is noted that
61//! Jellyfish merkle doesn't have a counterpart for `extension` node of ethereum patricia merkle.
62//!
63//! [`JellyfishMerkleTree`]: struct.JellyfishMerkleTree.html
64//! [`new`]: struct.JellyfishMerkleTree.html#method.new
65//! [`put_value_sets`]: struct.JellyfishMerkleTree.html#method.put_value_sets
66//! [`put_value_set`]: struct.JellyfishMerkleTree.html#method.put_value_set
67//! [`get_with_proof`]: struct.JellyfishMerkleTree.html#method.get_with_proof
68//! [`TreeUpdateBatch`]: struct.TreeUpdateBatch.html
69//! [`InternalNode`]: node_type/struct.InternalNode.html
70//! [`LeafNode`]: node_type/struct.LeafNode.html
71
72extern crate alloc;
73
74use core::fmt::Debug;
75
76use digest::generic_array::GenericArray;
77use digest::Digest;
78use digest::OutputSizeUser;
79use serde::{Deserialize, Serialize};
80#[cfg(feature = "std")]
81use thiserror::Error;
82
83mod bytes32ext;
84mod iterator;
85mod node_type;
86mod reader;
87mod tree;
88mod tree_cache;
89mod types;
90mod writer;
91
92#[cfg(any(test, feature = "mocks"))]
93pub mod mock;
94pub mod restore;
95
96use bytes32ext::Bytes32Ext;
97pub use iterator::JellyfishMerkleIterator;
98#[cfg(feature = "ics23")]
99pub use tree::ics23_impl::ics23_spec;
100pub use tree::JellyfishMerkleTree;
101#[cfg(any(test, feature = "sha2"))]
102pub use tree::Sha256Jmt;
103
104use types::nibble::ROOT_NIBBLE_HEIGHT;
105pub use types::proof;
106pub use types::Version;
107
108/// Contains types used to bridge a [`JellyfishMerkleTree`](crate::JellyfishMerkleTree)
109/// to the backing storage recording the tree's internal data.
110pub mod storage {
111 pub use node_type::{LeafNode, Node, NodeKey};
112 pub use reader::HasPreimage;
113 pub use reader::TreeReader;
114 pub use types::nibble::nibble_path::NibblePath;
115 pub use writer::{
116 NodeBatch, NodeStats, StaleNodeIndex, StaleNodeIndexBatch, TreeUpdateBatch, TreeWriter,
117 };
118
119 use super::*;
120}
121
122#[cfg(any(test))]
123mod tests;
124
125/// An error that occurs when the state root for a requested version is missing (e.g., because it was pruned).
126#[derive(Debug)]
127#[cfg_attr(feature = "std", derive(Error))]
128#[cfg_attr(
129 feature = "std",
130 error("Missing state root node at version {version}, probably pruned.")
131)]
132pub struct MissingRootError {
133 pub version: Version,
134}
135
136#[cfg(not(feature = "std"))]
137impl core::fmt::Display for MissingRootError {
138 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
139 write!(
140 f,
141 "Missing state root node at version {}, probably pruned.",
142 self.version
143 )
144 }
145}
146
147// TODO: reorg
148
149const SPARSE_MERKLE_PLACEHOLDER_HASH: [u8; 32] = *b"SPARSE_MERKLE_PLACEHOLDER_HASH__";
150
151/// An owned value stored in the [`JellyfishMerkleTree`].
152pub type OwnedValue = alloc::vec::Vec<u8>;
153
154#[cfg(any(test))]
155use proptest_derive::Arbitrary;
156
157/// A root of a [`JellyfishMerkleTree`].
158#[derive(
159 Copy,
160 Clone,
161 PartialEq,
162 Eq,
163 PartialOrd,
164 Ord,
165 Hash,
166 Serialize,
167 Deserialize,
168 borsh::BorshSerialize,
169 borsh::BorshDeserialize,
170)]
171#[cfg_attr(any(test), derive(Arbitrary))]
172pub struct RootHash(pub [u8; 32]);
173
174impl From<RootHash> for [u8; 32] {
175 fn from(value: RootHash) -> Self {
176 value.0
177 }
178}
179
180impl From<[u8; 32]> for RootHash {
181 fn from(value: [u8; 32]) -> Self {
182 Self(value)
183 }
184}
185
186impl AsRef<[u8]> for RootHash {
187 fn as_ref(&self) -> &[u8] {
188 &self.0
189 }
190}
191
192/// A hashed key used to index a [`JellyfishMerkleTree`].
193///
194/// # 🚨 Danger 🚨
195/// ics23 non-existence proofs require that all key preimages are non-empty. If you
196/// plan to use ics23 non-existence proofs, you must ensure that keys are non-empty
197/// before creating `KeyHash`es.
198///
199/// The [`JellyfishMerkleTree`] only stores key hashes, not full keys.
200#[derive(
201 Copy,
202 Clone,
203 PartialEq,
204 Eq,
205 PartialOrd,
206 Ord,
207 Hash,
208 Serialize,
209 Deserialize,
210 borsh::BorshSerialize,
211 borsh::BorshDeserialize,
212)]
213#[cfg_attr(any(test), derive(Arbitrary))]
214pub struct KeyHash(pub [u8; 32]);
215
216#[derive(
217 Copy,
218 Clone,
219 PartialEq,
220 Eq,
221 PartialOrd,
222 Ord,
223 Hash,
224 Serialize,
225 Deserialize,
226 borsh::BorshSerialize,
227 borsh::BorshDeserialize,
228)]
229#[cfg_attr(any(test), derive(Arbitrary))]
230// This needs to be public for the fuzzing/Arbitrary feature, but we don't
231// really want it to be, so #[doc(hidden)] is the next best thing.
232#[doc(hidden)]
233pub struct ValueHash(pub [u8; 32]);
234
235impl ValueHash {
236 pub fn with<H: SimpleHasher>(value: impl AsRef<[u8]>) -> Self {
237 Self(H::hash(value))
238 }
239}
240
241impl KeyHash {
242 /// Hash the provided key with the provided hasher and return a new `KeyHash`.
243 ///
244 /// # 🚨 Danger 🚨
245 /// If you will use ics23 non-existence proofs,
246 /// you must ensure that the key is non-empty before calling this function.
247 pub fn with<H: SimpleHasher>(key: impl AsRef<[u8]>) -> Self {
248 let key_hash = Self(H::hash(key.as_ref()));
249 // Adding a tracing event here allows cross-referencing the key hash
250 // with the original key bytes when looking through logs.
251 tracing::debug!(key = ?EscapedByteSlice(key.as_ref()), ?key_hash, "hashed jmt key");
252 key_hash
253 }
254}
255
256impl core::fmt::Debug for KeyHash {
257 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
258 f.debug_tuple("KeyHash")
259 .field(&hex::encode(self.0))
260 .finish()
261 }
262}
263
264impl core::fmt::Debug for ValueHash {
265 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
266 f.debug_tuple("ValueHash")
267 .field(&hex::encode(self.0))
268 .finish()
269 }
270}
271
272impl core::fmt::Debug for RootHash {
273 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
274 f.debug_tuple("RootHash")
275 .field(&hex::encode(self.0))
276 .finish()
277 }
278}
279
280struct EscapedByteSlice<'a>(&'a [u8]);
281
282impl<'a> core::fmt::Debug for EscapedByteSlice<'a> {
283 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
284 write!(f, "b\"")?;
285 for &b in self.0 {
286 // https://doc.rust-lang.org/reference/tokens.html#byte-escapes
287 #[allow(clippy::manual_range_contains)]
288 if b == b'\n' {
289 write!(f, "\\n")?;
290 } else if b == b'\r' {
291 write!(f, "\\r")?;
292 } else if b == b'\t' {
293 write!(f, "\\t")?;
294 } else if b == b'\\' || b == b'"' {
295 write!(f, "\\{}", b as char)?;
296 } else if b == b'\0' {
297 write!(f, "\\0")?;
298 // ASCII printable
299 } else if b >= 0x20 && b < 0x7f {
300 write!(f, "{}", b as char)?;
301 } else {
302 write!(f, "\\x{:02x}", b)?;
303 }
304 }
305 write!(f, "\"")?;
306 Ok(())
307 }
308}
309
310/// A minimal trait representing a hash function. We implement our own
311/// rather than relying on `Digest` for broader compatibility.
312pub trait SimpleHasher: Sized {
313 /// Creates a new hasher with default state.
314 fn new() -> Self;
315 /// Ingests the provided data, updating the hasher's state.
316 fn update(&mut self, data: &[u8]);
317 /// Consumes the hasher state to produce a digest.
318 fn finalize(self) -> [u8; 32];
319 /// Returns the digest of the provided data.
320 fn hash(data: impl AsRef<[u8]>) -> [u8; 32] {
321 let mut hasher = Self::new();
322 hasher.update(data.as_ref());
323 hasher.finalize()
324 }
325}
326
327impl<T: Digest> SimpleHasher for T
328where
329 [u8; 32]: From<GenericArray<u8, <T as OutputSizeUser>::OutputSize>>,
330{
331 fn new() -> Self {
332 <T as Digest>::new()
333 }
334
335 fn update(&mut self, data: &[u8]) {
336 self.update(data)
337 }
338
339 fn finalize(self) -> [u8; 32] {
340 self.finalize().into()
341 }
342}
343
344/// A trivial implementation of [`SimpleHasher`] that simply returns the first 32 bytes of the
345/// provided data. This is useful to avoid hashing data when testing, and facilitate debugging
346/// specific tree configurations.
347pub struct TransparentHasher {
348 key: [u8; 32],
349}
350
351impl SimpleHasher for TransparentHasher {
352 fn new() -> Self {
353 TransparentHasher { key: [0u8; 32] }
354 }
355
356 fn update(&mut self, data: &[u8]) {
357 for (dest, &src) in self.key.iter_mut().zip(data.iter()) {
358 *dest = src;
359 }
360 }
361 fn finalize(self) -> [u8; 32] {
362 self.key
363 }
364}