Skip to content

Commit

Permalink
chore(trie): dedup node iters
Browse files Browse the repository at this point in the history
  • Loading branch information
rkrasiuk committed May 24, 2024
1 parent 0327007 commit 12cbd55
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 195 deletions.
25 changes: 13 additions & 12 deletions crates/trie-parallel/src/async_root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use reth_primitives::{
use reth_provider::{providers::ConsistentDbView, DatabaseProviderFactory, ProviderError};
use reth_tasks::pool::BlockingTaskPool;
use reth_trie::{
hashed_cursor::HashedPostStateCursorFactory,
node_iter::{AccountNode, AccountNodeIter},
hashed_cursor::{HashedCursorFactory, HashedPostStateCursorFactory},
node_iter::{TrieElement, TrieNodeIter},
trie_cursor::TrieCursorFactory,
updates::TrieUpdates,
walker::TrieWalker,
Expand Down Expand Up @@ -131,23 +131,24 @@ where
let hashed_cursor_factory = HashedPostStateCursorFactory::new(tx, &hashed_state_sorted);
let trie_cursor_factory = tx;

let trie_cursor =
trie_cursor_factory.account_trie_cursor().map_err(ProviderError::Database)?;
let walker = TrieWalker::new(
trie_cursor_factory.account_trie_cursor().map_err(ProviderError::Database)?,
prefix_sets.account_prefix_set,
)
.with_updates(retain_updates);
let mut account_node_iter = TrieNodeIter::new(
walker,
hashed_cursor_factory.hashed_account_cursor().map_err(ProviderError::Database)?,
);

let mut hash_builder = HashBuilder::default().with_updates(retain_updates);
let walker = TrieWalker::new(trie_cursor, prefix_sets.account_prefix_set)
.with_updates(retain_updates);
let mut account_node_iter =
AccountNodeIter::from_factory(walker, hashed_cursor_factory.clone())
.map_err(ProviderError::Database)?;

let mut account_rlp = Vec::with_capacity(128);
while let Some(node) = account_node_iter.try_next().map_err(ProviderError::Database)? {
match node {
AccountNode::Branch(node) => {
TrieElement::Branch(node) => {
hash_builder.add_branch(node.key, node.value, node.children_are_in_trie);
}
AccountNode::Leaf(hashed_address, account) => {
TrieElement::Leaf(hashed_address, account) => {
let (storage_root, _, updates) = match storage_roots.remove(&hashed_address) {
Some(rx) => rx.await.map_err(|_| {
AsyncStateRootError::StorageRootChannelClosed { hashed_address }
Expand Down
23 changes: 12 additions & 11 deletions crates/trie-parallel/src/parallel_root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use reth_primitives::{
use reth_provider::{providers::ConsistentDbView, DatabaseProviderFactory, ProviderError};
use reth_trie::{
hashed_cursor::{HashedCursorFactory, HashedPostStateCursorFactory},
node_iter::{AccountNode, AccountNodeIter},
node_iter::{TrieElement, TrieNodeIter},
trie_cursor::TrieCursorFactory,
updates::TrieUpdates,
walker::TrieWalker,
Expand Down Expand Up @@ -115,23 +115,24 @@ where
HashedPostStateCursorFactory::new(provider_ro.tx_ref(), &hashed_state_sorted);
let trie_cursor_factory = provider_ro.tx_ref();

let hashed_account_cursor =
hashed_cursor_factory.hashed_account_cursor().map_err(ProviderError::Database)?;
let trie_cursor =
trie_cursor_factory.account_trie_cursor().map_err(ProviderError::Database)?;
let walker = TrieWalker::new(
trie_cursor_factory.account_trie_cursor().map_err(ProviderError::Database)?,
prefix_sets.account_prefix_set,
)
.with_updates(retain_updates);
let mut account_node_iter = TrieNodeIter::new(
walker,
hashed_cursor_factory.hashed_account_cursor().map_err(ProviderError::Database)?,
);

let walker = TrieWalker::new(trie_cursor, prefix_sets.account_prefix_set)
.with_updates(retain_updates);
let mut account_node_iter = AccountNodeIter::new(walker, hashed_account_cursor);
let mut hash_builder = HashBuilder::default().with_updates(retain_updates);

let mut account_rlp = Vec::with_capacity(128);
while let Some(node) = account_node_iter.try_next().map_err(ProviderError::Database)? {
match node {
AccountNode::Branch(node) => {
TrieElement::Branch(node) => {
hash_builder.add_branch(node.key, node.value, node.children_are_in_trie);
}
AccountNode::Leaf(hashed_address, account) => {
TrieElement::Leaf(hashed_address, account) => {
let (storage_root, _, updates) = match storage_roots.remove(&hashed_address) {
Some(result) => result,
// Since we do not store all intermediate nodes in the database, there might
Expand Down
2 changes: 1 addition & 1 deletion crates/trie/src/hashed_cursor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pub trait HashedCursorFactory {
/// The cursor for iterating over hashed entries.
pub trait HashedCursor {
/// Value returned by the cursor.
type Value;
type Value: std::fmt::Debug;

/// Seek an entry greater or equal to the given key and position the cursor there.
fn seek(&mut self, key: B256) -> Result<Option<(B256, Self::Value)>, reth_db::DatabaseError>;
Expand Down
199 changes: 46 additions & 153 deletions crates/trie/src/node_iter.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
use crate::{
hashed_cursor::{HashedCursor, HashedCursorFactory, HashedStorageCursor},
trie_cursor::TrieCursor,
walker::TrieWalker,
};
use crate::{hashed_cursor::HashedCursor, trie_cursor::TrieCursor, walker::TrieWalker};
use reth_db::DatabaseError;
use reth_primitives::{trie::Nibbles, Account, B256, U256};
use reth_primitives::{trie::Nibbles, B256};

/// Represents a branch node in the trie.
#[derive(Debug)]
Expand All @@ -24,95 +20,80 @@ impl TrieBranchNode {
}
}

/// Represents a variant of an account node.
/// Represents variants of trie nodes returned by the iteration.
#[derive(Debug)]
pub enum AccountNode {
pub enum TrieElement<Value> {
/// Branch node.
Branch(TrieBranchNode),
/// Leaf node.
Leaf(B256, Account),
}

/// Represents a variant of a storage node.
#[derive(Debug)]
pub enum StorageNode {
/// Branch node.
Branch(TrieBranchNode),
/// Leaf node.
Leaf(B256, U256),
Leaf(B256, Value),
}

/// An iterator over existing intermediate branch nodes and updated leaf nodes.
#[derive(Debug)]
pub struct AccountNodeIter<C, H> {
/// Underlying walker over intermediate nodes.
pub struct TrieNodeIter<C, H: HashedCursor> {
/// The walker over intermediate nodes.
pub walker: TrieWalker<C>,
/// The cursor for the hashed account entries.
pub hashed_account_cursor: H,
/// The previous account key. If the iteration was previously interrupted, this value can be
/// The cursor for the hashed entries.
pub hashed_cursor: H,
/// The previous hashed key. If the iteration was previously interrupted, this value can be
/// used to resume iterating from the last returned leaf node.
previous_account_key: Option<B256>,
previous_hashed_key: Option<B256>,

/// Current hashed account entry.
current_hashed_entry: Option<(B256, Account)>,
/// Current hashed entry.
current_hashed_entry: Option<(B256, <H as HashedCursor>::Value)>,
/// Flag indicating whether we should check the current walker key.
current_walker_key_checked: bool,
}

impl<C, H> AccountNodeIter<C, H> {
/// Creates a new `AccountNodeIter`.
pub fn new(walker: TrieWalker<C>, hashed_account_cursor: H) -> Self {
impl<C, H: HashedCursor> TrieNodeIter<C, H> {
/// Creates a new [TrieNodeIter].
pub fn new(walker: TrieWalker<C>, hashed_cursor: H) -> Self {
Self {
walker,
hashed_account_cursor,
previous_account_key: None,
hashed_cursor,
previous_hashed_key: None,
current_hashed_entry: None,
current_walker_key_checked: false,
}
}

/// Create new `AccountNodeIter` by creating hashed account cursor from factory.
pub fn from_factory<F: HashedCursorFactory<AccountCursor = H>>(
walker: TrieWalker<C>,
factory: F,
) -> Result<Self, DatabaseError> {
Ok(Self::new(walker, factory.hashed_account_cursor()?))
}

/// Sets the last iterated account key and returns the modified `AccountNodeIter`.
/// Sets the last iterated hashed key and returns the modified [TrieNodeIter].
/// This is used to resume iteration from the last checkpoint.
pub fn with_last_account_key(mut self, previous_account_key: B256) -> Self {
self.previous_account_key = Some(previous_account_key);
pub fn with_last_hashed_key(mut self, previous_hashed_key: B256) -> Self {
self.previous_hashed_key = Some(previous_hashed_key);
self
}
}

impl<C, H> AccountNodeIter<C, H>
impl<C, H> TrieNodeIter<C, H>
where
C: TrieCursor,
H: HashedCursor<Value = Account>,
H: HashedCursor,
{
/// Return the next account trie node to be added to the hash builder.
/// Return the next trie node to be added to the hash builder.
///
/// Returns the nodes using this algorithm:
/// 1. Return the current intermediate branch node if it hasn't been updated.
/// 2. Advance the trie walker to the next intermediate branch node and retrieve next
/// unprocessed key.
/// 3. Reposition the hashed account cursor on the next unprocessed key.
/// 4. Return every hashed account entry up to the key of the current intermediate branch node.
/// 3. Reposition the hashed cursor on the next unprocessed key.
/// 4. Return every hashed entry up to the key of the current intermediate branch node.
/// 5. Repeat.
///
/// NOTE: The iteration will start from the key of the previous hashed entry if it was supplied.
pub fn try_next(&mut self) -> Result<Option<AccountNode>, DatabaseError> {
pub fn try_next(
&mut self,
) -> Result<Option<TrieElement<<H as HashedCursor>::Value>>, DatabaseError> {
loop {
// If the walker has a key...
if let Some(key) = self.walker.key() {
// Check if the current walker key is unchecked and there's no previous account key
if !self.current_walker_key_checked && self.previous_account_key.is_none() {
// Check if the current walker key is unchecked and there's no previous hashed key
if !self.current_walker_key_checked && self.previous_hashed_key.is_none() {
self.current_walker_key_checked = true;
// If it's possible to skip the current node in the walker, return a branch node
if self.walker.can_skip_current_node {
return Ok(Some(AccountNode::Branch(TrieBranchNode::new(
return Ok(Some(TrieElement::Branch(TrieBranchNode::new(
key.clone(),
self.walker.hash().unwrap(),
self.walker.children_are_in_trie(),
Expand All @@ -121,26 +102,26 @@ where
}
}

// If there's a hashed address and account...
if let Some((hashed_address, account)) = self.current_hashed_entry.take() {
// If the walker's key is less than the unpacked hashed address, reset the checked
// status and continue
if self.walker.key().map_or(false, |key| key < &Nibbles::unpack(hashed_address)) {
// If there's a hashed entry...
if let Some((hashed_key, value)) = self.current_hashed_entry.take() {
// If the walker's key is less than the unpacked hashed key,
// reset the checked status and continue
if self.walker.key().map_or(false, |key| key < &Nibbles::unpack(hashed_key)) {
self.current_walker_key_checked = false;
continue
}

// Set the next hashed entry as a leaf node and return
self.current_hashed_entry = self.hashed_account_cursor.next()?;
return Ok(Some(AccountNode::Leaf(hashed_address, account)))
self.current_hashed_entry = self.hashed_cursor.next()?;
return Ok(Some(TrieElement::Leaf(hashed_key, value)))
}

// Handle seeking and advancing based on the previous account key
match self.previous_account_key.take() {
Some(account_key) => {
// Seek to the previous account key and get the next hashed entry
self.hashed_account_cursor.seek(account_key)?;
self.current_hashed_entry = self.hashed_account_cursor.next()?;
// Handle seeking and advancing based on the previous hashed key
match self.previous_hashed_key.take() {
Some(hashed_key) => {
// Seek to the previous hashed key and get the next hashed entry
self.hashed_cursor.seek(hashed_key)?;
self.current_hashed_entry = self.hashed_cursor.next()?;
}
None => {
// Get the seek key and set the current hashed entry based on walker's next
Expand All @@ -149,7 +130,7 @@ where
Some(key) => key,
None => break, // no more keys
};
self.current_hashed_entry = self.hashed_account_cursor.seek(seek_key)?;
self.current_hashed_entry = self.hashed_cursor.seek(seek_key)?;
self.walker.advance()?;
}
}
Expand All @@ -158,91 +139,3 @@ where
Ok(None)
}
}

/// An iterator over existing intermediate storage branch nodes and updated leaf nodes.
#[derive(Debug)]
pub struct StorageNodeIter<C, H> {
/// Underlying walker over intermediate nodes.
pub walker: TrieWalker<C>,
/// The cursor for the hashed storage entries.
pub hashed_storage_cursor: H,

/// Current hashed storage entry.
current_hashed_entry: Option<(B256, U256)>,
/// Flag indicating whether we should check the current walker key.
current_walker_key_checked: bool,
}

impl<C, H> StorageNodeIter<C, H> {
/// Creates a new instance of StorageNodeIter.
pub fn new(walker: TrieWalker<C>, hashed_storage_cursor: H) -> Self {
Self {
walker,
hashed_storage_cursor,
current_walker_key_checked: false,
current_hashed_entry: None,
}
}
}

impl<C, H> StorageNodeIter<C, H>
where
C: TrieCursor,
H: HashedStorageCursor<Value = U256>,
{
/// Return the next storage trie node to be added to the hash builder.
///
/// Returns the nodes using this algorithm:
/// 1. Return the current intermediate branch node if it hasn't been updated.
/// 2. Advance the trie walker to the next intermediate branch node and retrieve next
/// unprocessed key.
/// 3. Reposition the hashed storage cursor on the next unprocessed key.
/// 4. Return every hashed storage entry up to the key of the current intermediate branch node.
/// 5. Repeat.
pub fn try_next(&mut self) -> Result<Option<StorageNode>, DatabaseError> {
loop {
// Check if there's a key in the walker.
if let Some(key) = self.walker.key() {
// Check if the walker key hasn't been checked yet.
if !self.current_walker_key_checked {
self.current_walker_key_checked = true;
// Check if the current node can be skipped in the walker.
if self.walker.can_skip_current_node {
// Return a branch node based on the walker's properties.
return Ok(Some(StorageNode::Branch(TrieBranchNode::new(
key.clone(),
self.walker.hash().unwrap(),
self.walker.children_are_in_trie(),
))))
}
}
}

// Check for a current hashed storage entry.
if let Some((hashed_key, value)) = self.current_hashed_entry.take() {
// Compare keys and proceed accordingly.
if self.walker.key().map_or(false, |key| key < &Nibbles::unpack(hashed_key)) {
self.current_walker_key_checked = false;
continue
}

// Move to the next hashed storage entry and return the corresponding leaf node.
self.current_hashed_entry = self.hashed_storage_cursor.next()?;
return Ok(Some(StorageNode::Leaf(hashed_key, value)))
}

// Attempt to get the next unprocessed key from the walker.
match self.walker.next_unprocessed_key() {
Some(seek_key) => {
// Seek and update the current hashed entry based on the new seek key.
self.current_hashed_entry = self.hashed_storage_cursor.seek(seek_key)?;
self.walker.advance()?;
}
// No more keys to process, break the loop.
None => break,
};
}

Ok(None) // Return None if no more nodes are available.
}
}
Loading

0 comments on commit 12cbd55

Please sign in to comment.