diff --git a/core/state/journal.go b/core/state/journal.go index 20f18fb98..44ae7faf6 100644 --- a/core/state/journal.go +++ b/core/state/journal.go @@ -90,8 +90,18 @@ type ( account *common.Address } resetObjectChange struct { + account *common.Address prev *stateObject prevdestruct bool + + // tracking previous states of accounts and storages in snapshot, before each transaction + prevAccount []byte + prevStorage map[common.Hash][]byte + + // tracking previous states of accounts and storages in trie, before each commit + prevAccountOriginExist bool + prevAccountOrigin []byte + prevStorageOrigin map[common.Hash][]byte } selfDestructChange struct { account *common.Address @@ -157,12 +167,21 @@ func (ch createObjectChange) dirtied() *common.Address { func (ch resetObjectChange) revert(s *StateDB) { s.setStateObject(ch.prev) if !ch.prevdestruct && s.snap != nil { - delete(s.snapDestructs, ch.prev.addrHash) + delete(s.stateObjectsDestruct, ch.prev.address) + } + if ch.prevAccountOriginExist { + s.accountsOrigin[ch.prev.addrHash] = ch.prevAccountOrigin + } + if ch.prevAccount != nil { + s.accounts[ch.prev.addrHash] = ch.prevAccount + } + if ch.prevStorage != nil { + s.storages[ch.prev.addrHash] = ch.prevStorage } } func (ch resetObjectChange) dirtied() *common.Address { - return nil + return ch.account } func (ch selfDestructChange) revert(s *StateDB) { diff --git a/core/state/metrics.go b/core/state/metrics.go index e702ef3a8..64c651461 100644 --- a/core/state/metrics.go +++ b/core/state/metrics.go @@ -27,4 +27,11 @@ var ( storageTriesUpdatedMeter = metrics.NewRegisteredMeter("state/update/storagenodes", nil) accountTrieDeletedMeter = metrics.NewRegisteredMeter("state/delete/accountnodes", nil) storageTriesDeletedMeter = metrics.NewRegisteredMeter("state/delete/storagenodes", nil) + + slotDeletionMaxCount = metrics.NewRegisteredGauge("state/delete/storage/max/slot", nil) + slotDeletionMaxSize = metrics.NewRegisteredGauge("state/delete/storage/max/size", nil) + slotDeletionTimer = metrics.NewRegisteredResettingTimer("state/delete/storage/timer", nil) + slotDeletionCount = metrics.NewRegisteredMeter("state/delete/storage/slot", nil) + slotDeletionSize = metrics.NewRegisteredMeter("state/delete/storage/size", nil) + slotDeletionSkip = metrics.NewRegisteredGauge("state/delete/storage/skip", nil) ) diff --git a/core/state/state_object.go b/core/state/state_object.go index 3adcff01e..0abc03d7f 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -62,13 +62,14 @@ func (s Storage) Copy() Storage { // // The usage pattern is as follows: // First you need to obtain a state object. -// Account values can be accessed and modified through the object. -// Finally, call CommitTrie to write the modified storage trie into a database. +// Account values as well as storages can be accessed and modified through the object. +// Finally, call commit to return the changes of storage trie and update account data. type stateObject struct { - address common.Address - addrHash common.Hash // hash of ethereum address of the account - data types.StateAccount + address common.Address // address of the account + addrHash common.Hash // hash of ethereum address of the account + data types.StateAccount // Account data with all mutations applied in the scope of block db *StateDB + origin *types.StateAccount // Account original data without any change applied, nil means it was not existent before // DB error. // State objects are used by the consensus core and VM which are @@ -81,17 +82,22 @@ type stateObject struct { trie Trie // storage trie, which becomes non-nil on first access code Code // contract bytecode, which gets set when code is loaded - originStorage Storage // Storage cache of original entries to dedup rewrites, reset for every transaction + originStorage Storage // Storage cache of original entries to dedup rewrites pendingStorage Storage // Storage entries that need to be flushed to disk, at the end of an entire block - dirtyStorage Storage // Storage entries that have been modified in the current transaction execution + dirtyStorage Storage // Storage entries that have been modified in the current transaction execution, reset for every transaction fakeStorage Storage // Fake storage which constructed by caller for debugging purpose. // Cache flags. - // When an object is marked self-destructed it will be delete from the trie - // during the "update" phase of the state transition. - dirtyCode bool // true if the code was updated + dirtyCode bool // true if the code was updated + + // Flag whether the account was marked as selfDestructed. The selfDestructed account + // is still accessible in the scope of same transaction. selfDestructed bool - deleted bool + + // Flag whether the account was marked as deleted. The selfDestructed account + // or the account is considered as empty will be marked as deleted at + // the end of transaction and no longer accessible anymore. + deleted bool // Flag whether the object was created in the current transaction created bool @@ -103,21 +109,19 @@ func (s *stateObject) empty() bool { } // newObject creates a state object. -func newObject(db *StateDB, address common.Address, data types.StateAccount) *stateObject { - if data.Balance == nil { - data.Balance = new(big.Int) - } - if data.CodeHash == nil { - data.CodeHash = emptyCodeHash - } - if data.Root == (common.Hash{}) { - data.Root = emptyRoot +func newObject(db *StateDB, address common.Address, acct *types.StateAccount) *stateObject { + // origin is supposed to not be changed directly but only be reassigned with the current state when committing + // so it's safe to use pointer here. + origin := acct + if acct == nil { + acct = types.NewEmptyStateAccount() } return &stateObject{ db: db, address: address, addrHash: crypto.Keccak256Hash(address[:]), - data: data, + origin: origin, + data: *acct, originStorage: make(Storage), pendingStorage: make(Storage), dirtyStorage: make(Storage), @@ -227,7 +231,7 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has // 1) resurrect happened, and new slot values were set -- those should // have been handles via pendingStorage above. // 2) we don't have new values, and can deliver empty response back - if _, destructed := s.db.snapDestructs[s.addrHash]; destructed { + if _, destructed := s.db.stateObjectsDestruct[s.address]; destructed { return common.Hash{} } enc, err = s.db.snap.Storage(s.addrHash, crypto.Keccak256Hash(key.Bytes())) @@ -334,7 +338,10 @@ func (s *stateObject) updateTrie(db Database) Trie { defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) } // The snapshot storage map for the object - var storage map[common.Hash][]byte + var ( + storage map[common.Hash][]byte + origin map[common.Hash][]byte + ) // Insert all the pending updates into the trie tr := s.getTrie(db) hasher := s.db.hasher @@ -345,6 +352,7 @@ func (s *stateObject) updateTrie(db Database) Trie { if value == s.originStorage[key] { continue } + prev := s.originStorage[key] s.originStorage[key] = value var v []byte @@ -357,17 +365,35 @@ func (s *stateObject) updateTrie(db Database) Trie { s.setError(tr.TryUpdate(key[:], v)) s.db.StorageUpdated += 1 } - // If state snapshotting is active, cache the data til commit - if s.db.snap != nil { - if storage == nil { - // Retrieve the old storage map, if available, create a new one otherwise - if storage = s.db.snapStorage[s.addrHash]; storage == nil { - storage = make(map[common.Hash][]byte) - s.db.snapStorage[s.addrHash] = storage - } + // Cache the mutated storage slots until commit + if storage == nil { + // Retrieve the old storage map, if available, create a new one otherwise + if storage = s.db.storages[s.addrHash]; storage == nil { + storage = make(map[common.Hash][]byte) + s.db.storages[s.addrHash] = storage } - storage[crypto.HashData(hasher, key[:])] = v // v will be nil if it's deleted } + khash := crypto.HashData(hasher, key[:]) + storage[khash] = v // v will be nil if it's deleted + // Cache the original value of mutated storage slots + if origin == nil { + if origin = s.db.storagesOrigin[s.addrHash]; origin == nil { + origin = make(map[common.Hash][]byte) + s.db.storagesOrigin[s.addrHash] = origin + } + } + // Track the original value of slot only if it's mutated first time + if _, ok := origin[khash]; !ok { + if prev == (common.Hash{}) { + origin[khash] = nil // nil if it was not present previously + } else { + // Encoding []byte cannot fail, ok to ignore the error. + b, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(prev[:])) + origin[khash] = b + } + } + + // Cache the items for preloading usedStorage = append(usedStorage, common.CopyBytes(key[:])) // Copy needed for closure } if s.db.prefetcher != nil { @@ -392,11 +418,11 @@ func (s *stateObject) updateRoot(db Database) { s.data.Root = s.trie.Hash() } -// commitTrie submits the storage changes into the storage trie and re-computes -// the root. Besides, all trie changes will be collected in a nodeset and returned. -func (s *stateObject) commitTrie(db Database) (*trienode.NodeSet, error) { +// commit returns the changes made in storage trie and updates the account data. +func (s *stateObject) commit(db Database) (*trienode.NodeSet, error) { // If nothing changed, don't bother with hashing anything if s.updateTrie(db) == nil { + s.origin = s.data.Copy() // Update original account data after commit return nil, nil } if s.dbErr != nil { @@ -410,6 +436,8 @@ func (s *stateObject) commitTrie(db Database) (*trienode.NodeSet, error) { if err == nil { s.data.Root = root } + // Update original account data after commit + s.origin = s.data.Copy() return nodes, err } @@ -449,7 +477,13 @@ func (s *stateObject) setBalance(amount *big.Int) { } func (s *stateObject) deepCopy(db *StateDB) *stateObject { - stateObject := newObject(db, s.address, s.data) + stateObject := &stateObject{ + db: db, + address: s.address, + addrHash: s.addrHash, + origin: s.origin, + data: s.data, + } if s.trie != nil { stateObject.trie = db.db.CopyTrie(s.trie) } diff --git a/core/state/statedb.go b/core/state/statedb.go index c15613de4..c2566c56e 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -35,6 +35,7 @@ import ( "github.com/ethereum/go-ethereum/rlp" "github.com/ethereum/go-ethereum/trie" "github.com/ethereum/go-ethereum/trie/trienode" + "github.com/ethereum/go-ethereum/trie/triestate" ) type revision struct { @@ -73,16 +74,21 @@ type StateDB struct { // It will be updated when the Commit is called. originalRoot common.Hash - snaps *snapshot.Tree - snap snapshot.Snapshot - snapDestructs map[common.Hash]struct{} - snapAccounts map[common.Hash][]byte - snapStorage map[common.Hash]map[common.Hash][]byte + snaps *snapshot.Tree + snap snapshot.Snapshot + + // These maps hold the state changes (including the corresponding + // original value) that occurred in this **block**. + accounts map[common.Hash][]byte // The mutated accounts in 'slim RLP' encoding + storages map[common.Hash]map[common.Hash][]byte // The mutated slots in prefix-zero trimmed rlp format + accountsOrigin map[common.Hash][]byte // The original value of mutated accounts in 'slim RLP' encoding + storagesOrigin map[common.Hash]map[common.Hash][]byte // The original value of mutated slots in prefix-zero trimmed rlp format // This map holds 'live' objects, which will get modified while processing a state transition. - stateObjects map[common.Address]*stateObject - stateObjectsPending map[common.Address]struct{} // State objects finalized but not yet written to the trie - stateObjectsDirty map[common.Address]struct{} // State objects modified in the current execution + stateObjects map[common.Address]*stateObject + stateObjectsPending map[common.Address]struct{} // State objects finalized but not yet written to the trie + stateObjectsDirty map[common.Address]struct{} // State objects modified in the current execution + stateObjectsDestruct map[common.Address]*types.StateAccount // State objects destructed in the block along with its previous value // DB error. // State objects are used by the consensus core and VM which are @@ -140,26 +146,27 @@ func New(root common.Hash, db Database, snaps *snapshot.Tree) (*StateDB, error) return nil, err } sdb := &StateDB{ - db: db, - trie: tr, - originalRoot: root, - snaps: snaps, - stateObjects: make(map[common.Address]*stateObject), - stateObjectsPending: make(map[common.Address]struct{}), - stateObjectsDirty: make(map[common.Address]struct{}), - logs: make(map[common.Hash][]*types.Log), - preimages: make(map[common.Hash][]byte), - journal: newJournal(), - accessList: newAccessList(), - transientStorage: newTransientStorage(), - hasher: crypto.NewKeccakState(), + db: db, + trie: tr, + originalRoot: root, + snaps: snaps, + accounts: make(map[common.Hash][]byte), + storages: make(map[common.Hash]map[common.Hash][]byte), + accountsOrigin: make(map[common.Hash][]byte), + storagesOrigin: make(map[common.Hash]map[common.Hash][]byte), + stateObjects: make(map[common.Address]*stateObject), + stateObjectsPending: make(map[common.Address]struct{}), + stateObjectsDirty: make(map[common.Address]struct{}), + stateObjectsDestruct: make(map[common.Address]*types.StateAccount), + logs: make(map[common.Hash][]*types.Log), + preimages: make(map[common.Hash][]byte), + journal: newJournal(), + accessList: newAccessList(), + transientStorage: newTransientStorage(), + hasher: crypto.NewKeccakState(), } if sdb.snaps != nil { - if sdb.snap = sdb.snaps.Snapshot(root); sdb.snap != nil { - sdb.snapDestructs = make(map[common.Hash]struct{}) - sdb.snapAccounts = make(map[common.Hash][]byte) - sdb.snapStorage = make(map[common.Hash]map[common.Hash][]byte) - } + sdb.snap = sdb.snaps.Snapshot(root) } return sdb, nil } @@ -431,6 +438,14 @@ func (s *StateDB) SetState(addr common.Address, key, value common.Hash) { // SetStorage replaces the entire storage for the specified account with given // storage. This function should only be used for debugging. func (s *StateDB) SetStorage(addr common.Address, storage map[common.Hash]common.Hash) { + // SetStorage needs to wipe existing storage. We achieve this by pretending + // that the account self-destructed earlier in this block, by flagging + // it in stateObjectsDestruct. The effect of doing so is that storage lookups + // will not hit disk, since it is assumed that the disk-data is belonging + // to a previous incarnation of the object. + if _, ok := s.stateObjectsDestruct[addr]; !ok { + s.stateObjectsDestruct[addr] = nil + } stateObject := s.GetOrNewStateObject(addr) if stateObject != nil { stateObject.SetStorage(storage) @@ -514,12 +529,20 @@ func (s *StateDB) updateStateObject(obj *stateObject) { s.setError(fmt.Errorf("updateStateObject (%x) error: %v", addr[:], err)) } - // If state snapshotting is active, cache the data til commit. Note, this - // update mechanism is not symmetric to the deletion, because whereas it is - // enough to track account updates at commit time, deletions need tracking - // at transaction boundary level to ensure we capture state clearing. - if s.snap != nil { - s.snapAccounts[obj.addrHash] = snapshot.SlimAccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash) + // Cache the data until commit. Note, this update mechanism is not symmetric + // to the deletion, because whereas it is enough to track account updates + // at commit time, deletions need tracking at transaction boundary level to + // ensure we capture state clearing. + s.accounts[obj.addrHash] = snapshot.SlimAccountRLP(obj.data.Nonce, obj.data.Balance, obj.data.Root, obj.data.CodeHash) + // Track the original value of mutated account, nil means it was not present. + // Skip if it has been tracked (because updateStateObject may be called + // multiple times in a block). + if _, ok := s.accountsOrigin[obj.addrHash]; !ok { + if obj.origin == nil { + s.accountsOrigin[obj.addrHash] = nil + } else { + s.accountsOrigin[obj.addrHash] = snapshot.SlimAccountRLP(obj.origin.Nonce, obj.origin.Balance, obj.origin.Root, obj.origin.CodeHash) + } } } @@ -603,7 +626,7 @@ func (s *StateDB) getDeletedStateObject(addr common.Address) *stateObject { } } // Insert into the live set - obj := newObject(s, addr, *data) + obj := newObject(s, addr, data) s.setStateObject(obj) return obj } @@ -624,20 +647,38 @@ func (s *StateDB) GetOrNewStateObject(addr common.Address) *stateObject { // createObject creates a new state object. If there is an existing account with // the given address, it is overwritten and returned as the second return value. func (s *StateDB) createObject(addr common.Address) (newobj, prev *stateObject) { + // The original account should be marked as destructed and all cached + // account and storage data should be cleared as well. Note, it must + // be done here, otherwise the destruction event of original one will + // be lost. prev = s.getDeletedStateObject(addr) // Note, prev might have been deleted, we need that! var prevdestruct bool - if s.snap != nil && prev != nil { - _, prevdestruct = s.snapDestructs[prev.addrHash] + if prev != nil { + _, prevdestruct = s.stateObjectsDestruct[prev.address] if !prevdestruct { - s.snapDestructs[prev.addrHash] = struct{}{} + s.stateObjectsDestruct[prev.address] = prev.origin } } - newobj = newObject(s, addr, types.StateAccount{}) + newobj = newObject(s, addr, nil) if prev == nil { s.journal.append(createObjectChange{account: &addr}) } else { - s.journal.append(resetObjectChange{prev: prev, prevdestruct: prevdestruct}) + prevAccount, ok := s.accountsOrigin[prev.addrHash] + s.journal.append(resetObjectChange{ + account: &addr, + prev: prev, + prevdestruct: prevdestruct, + prevAccount: s.accounts[prev.addrHash], + prevStorage: s.storages[prev.addrHash], + prevAccountOriginExist: ok, + prevAccountOrigin: prevAccount, + prevStorageOrigin: s.storagesOrigin[prev.addrHash], + }) + delete(s.accounts, prev.addrHash) + delete(s.storages, prev.addrHash) + delete(s.accountsOrigin, prev.addrHash) + delete(s.storagesOrigin, prev.addrHash) } newobj.created = true @@ -712,18 +753,30 @@ func (s *StateDB) Blacklisted(contractAddr *common.Address, addr *common.Address func (s *StateDB) Copy() *StateDB { // Copy all the basic fields, initialize the memory ones state := &StateDB{ - db: s.db, - trie: s.db.CopyTrie(s.trie), - originalRoot: s.originalRoot, - stateObjects: make(map[common.Address]*stateObject, len(s.journal.dirties)), - stateObjectsPending: make(map[common.Address]struct{}, len(s.stateObjectsPending)), - stateObjectsDirty: make(map[common.Address]struct{}, len(s.journal.dirties)), - refund: s.refund, - logs: make(map[common.Hash][]*types.Log, len(s.logs)), - logSize: s.logSize, - preimages: make(map[common.Hash][]byte, len(s.preimages)), - journal: newJournal(), - hasher: crypto.NewKeccakState(), + db: s.db, + trie: s.db.CopyTrie(s.trie), + originalRoot: s.originalRoot, + accounts: copySet(s.accounts), + storages: copy2DSet(s.storages), + accountsOrigin: copySet(s.accountsOrigin), + storagesOrigin: copy2DSet(s.storagesOrigin), + stateObjects: make(map[common.Address]*stateObject, len(s.journal.dirties)), + stateObjectsPending: make(map[common.Address]struct{}, len(s.stateObjectsPending)), + stateObjectsDirty: make(map[common.Address]struct{}, len(s.journal.dirties)), + stateObjectsDestruct: make(map[common.Address]*types.StateAccount, len(s.stateObjectsDestruct)), + refund: s.refund, + logs: make(map[common.Hash][]*types.Log, len(s.logs)), + logSize: s.logSize, + preimages: make(map[common.Hash][]byte, len(s.preimages)), + journal: newJournal(), + hasher: crypto.NewKeccakState(), + + // In order for the block producer to be able to use and make additions + // to the snapshot tree, we need to copy that as well. Otherwise, any + // block mined by ourselves will cause gaps in the tree, and force the + // miner to operate trie-backed only. + snaps: s.snaps, + snap: s.snap, } // Copy the dirty states, logs, and preimages for addr := range s.journal.dirties { @@ -756,6 +809,12 @@ func (s *StateDB) Copy() *StateDB { } state.stateObjectsDirty[addr] = struct{}{} } + + // Deep copy the destruction markers. + for addr, value := range s.stateObjectsDestruct { + state.stateObjectsDestruct[addr] = value + } + for hash, logs := range s.logs { cpy := make([]*types.Log, len(logs)) for i, l := range logs { @@ -782,31 +841,7 @@ func (s *StateDB) Copy() *StateDB { if s.prefetcher != nil { state.prefetcher = s.prefetcher.copy() } - if s.snaps != nil { - // In order for the miner to be able to use and make additions - // to the snapshot tree, we need to copy that aswell. - // Otherwise, any block mined by ourselves will cause gaps in the tree, - // and force the miner to operate trie-backed only - state.snaps = s.snaps - state.snap = s.snap - // deep copy needed - state.snapDestructs = make(map[common.Hash]struct{}) - for k, v := range s.snapDestructs { - state.snapDestructs[k] = v - } - state.snapAccounts = make(map[common.Hash][]byte) - for k, v := range s.snapAccounts { - state.snapAccounts[k] = v - } - state.snapStorage = make(map[common.Hash]map[common.Hash][]byte) - for k, v := range s.snapStorage { - temp := make(map[common.Hash][]byte) - for kk, vv := range v { - temp[kk] = vv - } - state.snapStorage[k] = temp - } - } + return state } @@ -858,15 +893,20 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) { if obj.selfDestructed || (deleteEmptyObjects && obj.empty()) { obj.deleted = true - // If state snapshotting is active, also mark the destruction there. + // We need to maintain account deletions explicitly (will remain + // set indefinitely). Note only the first occurred self-destruct + // event is tracked. + if _, ok := s.stateObjectsDestruct[obj.address]; !ok { + s.stateObjectsDestruct[obj.address] = obj.origin + } + // Note, we can't do this only at the end of a block because multiple // transactions within the same block might self destruct and then // ressurrect an account; but the snapshotter needs both events. - if s.snap != nil { - s.snapDestructs[obj.addrHash] = struct{}{} // We need to maintain account deletions explicitly (will remain set indefinitely) - delete(s.snapAccounts, obj.addrHash) // Clear out any previously updated account data (may be recreated via a ressurrect) - delete(s.snapStorage, obj.addrHash) // Clear out any previously updated storage data (may be recreated via a ressurrect) - } + delete(s.accounts, obj.addrHash) // Clear out any previously updated account data (may be recreated via a resurrect) + delete(s.storages, obj.addrHash) // Clear out any previously updated storage data (may be recreated via a resurrect) + delete(s.accountsOrigin, obj.addrHash) // Clear out any previously updated account data (may be recreated via a resurrect) + delete(s.storagesOrigin, obj.addrHash) // Clear out any previously updated storage data (may be recreated via a resurrect) } else { obj.finalise(true) // Prefetch slots in the background } @@ -949,6 +989,134 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { return s.trie.Hash() } +// deleteStorage iterates the storage trie belongs to the account and mark all +// slots inside as deleted. +func (s *StateDB) deleteStorage(addr common.Address, addrHash common.Hash, root common.Hash) (bool, map[common.Hash][]byte, *trienode.NodeSet, error) { + start := time.Now() + tr, err := s.db.OpenStorageTrie(s.originalRoot, addrHash, root) + if err != nil { + return false, nil, nil, fmt.Errorf("failed to open storage trie, err: %w", err) + } + it := tr.NodeIterator(nil) + var ( + set = trienode.NewNodeSet(addrHash) + slots = make(map[common.Hash][]byte) + stateSize common.StorageSize + nodeSize common.StorageSize + ) + for it.Next(true) { + // arbitrary stateSize limit, make it configurable + if stateSize+nodeSize > 512*1024*1024 { + log.Info("Skip large storage deletion", "address", addr.Hex(), "states", stateSize, "nodes", nodeSize) + if metrics.EnabledExpensive { + slotDeletionSkip.Inc(1) + } + return true, nil, nil, nil + } + if it.Leaf() { + slots[common.BytesToHash(it.LeafKey())] = common.CopyBytes(it.LeafBlob()) + stateSize += common.StorageSize(common.HashLength + len(it.LeafBlob())) + continue + } + if it.Hash() == (common.Hash{}) { + continue + } + nodeSize += common.StorageSize(len(it.Path()) + len(it.NodeBlob())) + set.AddNode(it.Path(), trienode.NewNodeWithPrev(common.Hash{}, nil, it.NodeBlob())) + } + if err := it.Error(); err != nil { + return false, nil, nil, err + } + if metrics.EnabledExpensive { + if int64(len(slots)) > slotDeletionMaxCount.Value() { + slotDeletionMaxCount.Update(int64(len(slots))) + } + if int64(stateSize+nodeSize) > slotDeletionMaxSize.Value() { + slotDeletionMaxSize.Update(int64(stateSize + nodeSize)) + } + slotDeletionTimer.UpdateSince(start) + slotDeletionCount.Mark(int64(len(slots))) + slotDeletionSize.Mark(int64(stateSize + nodeSize)) + } + return false, slots, set, nil +} + +// handleDestruction processes all destruction markers and deletes the account +// and associated storage slots if necessary. There are four possible situations +// here: +// +// - the account was not existent and be marked as destructed +// +// - the account was not existent and be marked as destructed, +// however, it's resurrected later in the same block. +// +// - the account was existent and be marked as destructed +// +// - the account was existent and be marked as destructed, +// however it's resurrected later in the same block. +// +// In case (a), nothing needs be deleted, nil to nil transition can be ignored. +// +// In case (b), nothing needs be deleted, nil is used as the original value for +// newly created account and storages +// +// In case (c), **original** account along with its storages should be deleted, +// with their values be tracked as original value. +// +// In case (d), **original** account along with its storages should be deleted, +// with their values be tracked as original value. +func (s *StateDB) handleDestruction(nodes *trienode.MergedNodeSet) (map[common.Hash]struct{}, error) { + incomplete := make(map[common.Hash]struct{}) + for addr, prev := range s.stateObjectsDestruct { + // The original account was non-existing, and it's marked as destructed + // in the scope of block. It can be case (a) or (b). + // - for (a), skip it without doing anything. + // - for (b), track account's original value as nil. It may overwrite + // the data cached in s.accountsOrigin set by 'updateStateObject'. + addrHash := crypto.Keccak256Hash(addr[:]) + if prev == nil { + if _, ok := s.accounts[addrHash]; ok { + s.accountsOrigin[addrHash] = nil // case (b) + } + continue + } + // It can overwrite the data in s.accountsOrigin set by 'updateStateObject'. + s.accountsOrigin[addrHash] = snapshot.SlimAccountRLP(prev.Nonce, prev.Balance, prev.Root, prev.CodeHash) // case (c) or (d) + + // Short circuit if the storage was empty. + if prev.Root == types.EmptyRootHash { + continue + } + // Remove storage slots belong to the account. + aborted, slots, set, err := s.deleteStorage(addr, addrHash, prev.Root) + if err != nil { + return nil, fmt.Errorf("failed to delete storage, err: %w", err) + } + // The storage is too huge to handle, skip it but mark as incomplete. + // For case (d), the account is resurrected might with a few slots + // created. In this case, wipe the entire storage state diff because + // of aborted deletion. + if aborted { + incomplete[addrHash] = struct{}{} + delete(s.storagesOrigin, addrHash) + continue + } + if s.storagesOrigin[addrHash] == nil { + s.storagesOrigin[addrHash] = slots + } else { + // It can overwrite the data in s.storagesOrigin[addrHash] set by + // 'object.updateTrie'. + for key, val := range slots { + s.storagesOrigin[addrHash][key] = val + } + } + if err := nodes.Merge(set); err != nil { + return nil, err + } + } + return incomplete, nil +} + // SetTxContext sets the current transaction hash and index which are // used when the EVM emits new state logs. It should be invoked before // transaction execution. @@ -982,6 +1150,12 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { nodes = trienode.NewMergedNodeSet() ) codeWriter := s.db.TrieDB().DiskDB().NewBatch() + // Handle all state deletions first + incomplete, err := s.handleDestruction(nodes) + if err != nil { + return common.Hash{}, err + } + // Handle all state updates afterwards for addr := range s.stateObjectsDirty { if obj := s.stateObjects[addr]; !obj.deleted { // Write any contract code associated with the state object @@ -990,7 +1164,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { obj.dirtyCode = false } // Write any storage changes in the state object to its storage trie - nodeSet, err := obj.commitTrie(s.db) + nodeSet, err := obj.commit(s.db) if err != nil { return common.Hash{}, err } @@ -1006,15 +1180,6 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { } } } - // If the contract is destructed, the storage is still left in the - // database as dangling data. Theoretically it's should be wiped from - // database as well, but in hash-based-scheme it's extremely hard to - // determine that if the trie nodes are also referenced by other storage, - // and in path-based-scheme some technical challenges are still unsolved. - // Although it won't affect the correctness but please fix it TODO(rjl493456442). - if len(s.stateObjectsDirty) > 0 { - s.stateObjectsDirty = make(map[common.Address]struct{}) - } if codeWriter.ValueSize() > 0 { if err := codeWriter.Write(); err != nil { log.Crit("Failed to commit dirty codes", "error", err) @@ -1056,7 +1221,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { start := time.Now() // Only update if there's a state transition (skip empty Clique blocks) if parent := s.snap.Root(); parent != root { - if err := s.snaps.Update(root, parent, s.snapDestructs, s.snapAccounts, s.snapStorage); err != nil { + if err := s.snaps.Update(root, parent, s.convertAccountSet(s.stateObjectsDestruct), s.accounts, s.storages); err != nil { log.Warn("Failed to update snapshot tree", "from", parent, "to", root, "err", err) } // Keep 128 diff layers in the memory, persistent layer is 129th. @@ -1067,7 +1232,7 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { log.Warn("Failed to cap snapshot tree", "root", root, "layers", 128, "err", err) } } - s.snap, s.snapDestructs, s.snapAccounts, s.snapStorage = nil, nil, nil, nil + s.snap = nil if metrics.EnabledExpensive { s.SnapshotCommits += time.Since(start) } @@ -1083,7 +1248,12 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { } if root != origin { start := time.Now() - if err := s.db.TrieDB().Update(root, origin, nodes); err != nil { + set := &triestate.Set{ + Accounts: s.accountsOrigin, + Storages: s.storagesOrigin, + Incomplete: incomplete, + } + if err := s.db.TrieDB().Update(root, origin, nodes, set); err != nil { return common.Hash{}, err } s.originalRoot = root @@ -1091,6 +1261,13 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (common.Hash, error) { s.TrieDBCommits += time.Since(start) } } + // Clear all internal flags at the end of commit operation. + s.accounts = make(map[common.Hash][]byte) + s.storages = make(map[common.Hash]map[common.Hash][]byte) + s.accountsOrigin = make(map[common.Hash][]byte) + s.storagesOrigin = make(map[common.Hash]map[common.Hash][]byte) + s.stateObjectsDirty = make(map[common.Address]struct{}) + s.stateObjectsDestruct = make(map[common.Address]*types.StateAccount) return root, nil } @@ -1197,3 +1374,38 @@ func (s *StateDB) DirtyAccounts(hash common.Hash, number uint64) []*types.DirtyS return dirtyAccounts } + +// convertAccountSet converts a provided account set from address keyed to hash keyed. +func (s *StateDB) convertAccountSet(set map[common.Address]*types.StateAccount) map[common.Hash]struct{} { + ret := make(map[common.Hash]struct{}, len(set)) + for addr := range set { + obj, exist := s.stateObjects[addr] + if !exist { + ret[crypto.Keccak256Hash(addr[:])] = struct{}{} + } else { + ret[obj.addrHash] = struct{}{} + } + } + return ret +} + +// copySet returns a deep-copied set. +func copySet[k comparable](set map[k][]byte) map[k][]byte { + copied := make(map[k][]byte, len(set)) + for key, val := range set { + copied[key] = common.CopyBytes(val) + } + return copied +} + +// copy2DSet returns a two-dimensional deep-copied set. +func copy2DSet[k comparable](set map[k]map[common.Hash][]byte) map[k]map[common.Hash][]byte { + copied := make(map[k]map[common.Hash][]byte, len(set)) + for addr, subset := range set { + copied[addr] = make(map[common.Hash][]byte, len(subset)) + for key, val := range subset { + copied[addr][key] = common.CopyBytes(val) + } + } + return copied +} diff --git a/core/state/statedb_fuzz_test.go b/core/state/statedb_fuzz_test.go new file mode 100644 index 000000000..0f627b2d6 --- /dev/null +++ b/core/state/statedb_fuzz_test.go @@ -0,0 +1,362 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see + +package state + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "math" + "math/big" + "math/rand" + "reflect" + "strings" + "testing" + "testing/quick" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/state/snapshot" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triestate" +) + +// A stateTest checks that the state changes are correctly captured. Instances +// of this test with pseudorandom content are created by Generate. +// +// The test works as follows: +// +// A list of states are created by applying actions. The state changes between +// each state instance are tracked and be verified. +type stateTest struct { + addrs []common.Address // all account addresses + actions [][]testAction // modifications to the state, grouped by block + chunk int // The number of actions per chunk + err error // failure details are reported through this field +} + +// newStateTestAction creates a random action that changes state. +func newStateTestAction(addr common.Address, r *rand.Rand, index int) testAction { + actions := []testAction{ + { + name: "SetBalance", + fn: func(a testAction, s *StateDB) { + s.SetBalance(addr, big.NewInt(a.args[0])) + }, + args: make([]int64, 1), + }, + { + name: "SetNonce", + fn: func(a testAction, s *StateDB) { + s.SetNonce(addr, uint64(a.args[0])) + }, + args: make([]int64, 1), + }, + { + name: "SetState", + fn: func(a testAction, s *StateDB) { + var key, val common.Hash + binary.BigEndian.PutUint16(key[:], uint16(a.args[0])) + binary.BigEndian.PutUint16(val[:], uint16(a.args[1])) + s.SetState(addr, key, val) + }, + args: make([]int64, 2), + }, + { + name: "SetCode", + fn: func(a testAction, s *StateDB) { + code := make([]byte, 16) + binary.BigEndian.PutUint64(code, uint64(a.args[0])) + binary.BigEndian.PutUint64(code[8:], uint64(a.args[1])) + s.SetCode(addr, code) + }, + args: make([]int64, 2), + }, + { + name: "CreateAccount", + fn: func(a testAction, s *StateDB) { + s.CreateAccount(addr) + }, + }, + { + name: "Suicide", + fn: func(a testAction, s *StateDB) { + s.SelfDestruct(addr) + }, + }, + } + var nonRandom = index != -1 + if index == -1 { + index = r.Intn(len(actions)) + } + action := actions[index] + var names []string + if !action.noAddr { + names = append(names, addr.Hex()) + } + for i := range action.args { + if nonRandom { + action.args[i] = rand.Int63n(10000) + 1 // set balance to non-zero + } else { + action.args[i] = rand.Int63n(10000) + } + names = append(names, fmt.Sprint(action.args[i])) + } + action.name += " " + strings.Join(names, ", ") + return action +} + +// Generate returns a new snapshot test of the given size. All randomness is +// derived from r. +func (*stateTest) Generate(r *rand.Rand, size int) reflect.Value { + addrs := make([]common.Address, 5) + for i := range addrs { + addrs[i][0] = byte(i) + } + actions := make([][]testAction, rand.Intn(5)+1) + + for i := 0; i < len(actions); i++ { + actions[i] = make([]testAction, size) + for j := range actions[i] { + if j == 0 { + // Always include a set balance action to make sure + // the state changes are not empty. + actions[i][j] = newStateTestAction(common.HexToAddress("0xdeadbeef"), r, 0) + continue + } + actions[i][j] = newStateTestAction(addrs[r.Intn(len(addrs))], r, -1) + } + } + chunk := int(math.Sqrt(float64(size))) + if size > 0 && chunk == 0 { + chunk = 1 + } + return reflect.ValueOf(&stateTest{ + addrs: addrs, + actions: actions, + chunk: chunk, + }) +} + +func (test *stateTest) String() string { + out := new(bytes.Buffer) + for i, actions := range test.actions { + fmt.Fprintf(out, "---- block %d ----\n", i) + for j, action := range actions { + if j%test.chunk == 0 { + fmt.Fprintf(out, "---- transaction %d ----\n", j/test.chunk) + } + fmt.Fprintf(out, "%4d: %s\n", j%test.chunk, action.name) + } + } + return out.String() +} + +func (test *stateTest) run() bool { + var ( + roots []common.Hash + accountList []map[common.Hash][]byte + storageList []map[common.Hash]map[common.Hash][]byte + onCommit = func(states *triestate.Set) { + accountList = append(accountList, copySet(states.Accounts)) + storageList = append(storageList, copy2DSet(states.Storages)) + } + disk = rawdb.NewMemoryDatabase() + tdb = trie.NewDatabaseWithConfig(disk, &trie.Config{OnCommit: onCommit}) + sdb = NewDatabaseWithNodeDB(disk, tdb) + byzantium = rand.Intn(2) == 0 + ) + for i, actions := range test.actions { + root := types.EmptyRootHash + if i != 0 { + root = roots[len(roots)-1] + } + state, err := New(root, sdb, nil) + if err != nil { + panic(err) + } + for i, action := range actions { + if i%test.chunk == 0 && i != 0 { + if byzantium { + state.Finalise(true) // call finalise at the transaction boundary + } else { + state.IntermediateRoot(true) // call intermediateRoot at the transaction boundary + } + } + action.fn(action, state) + } + if byzantium { + state.Finalise(true) // call finalise at the transaction boundary + } else { + state.IntermediateRoot(true) // call intermediateRoot at the transaction boundary + } + nroot, err := state.Commit(true) // call commit at the block boundary + if err != nil { + panic(err) + } + if nroot == root { + return true // filter out non-change state transition + } + roots = append(roots, nroot) + } + for i := 0; i < len(test.actions); i++ { + root := types.EmptyRootHash + if i != 0 { + root = roots[i-1] + } + test.err = test.verify(root, roots[i], tdb, accountList[i], storageList[i]) + if test.err != nil { + return false + } + } + return true +} + +// verifyAccountCreation this function is called once the state diff says that +// specific account was not present. A serial of checks will be performed to +// ensure the state diff is correct, includes: +// +// - the account was indeed not present in trie +// - the account is present in new trie, nil->nil is regarded as invalid +// - the slots transition is correct +func (test *stateTest) verifyAccountCreation(next common.Hash, db *trie.Database, otr, ntr *trie.Trie, addrHash common.Hash, slots map[common.Hash][]byte) error { + // Verify account change + oBlob := otr.Get(addrHash.Bytes()) + nBlob := ntr.Get(addrHash.Bytes()) + if len(oBlob) != 0 { + return fmt.Errorf("unexpected account in old trie, %x", addrHash) + } + if len(nBlob) == 0 { + return fmt.Errorf("missing account in new trie, %x", addrHash) + } + + // Verify storage changes + var nAcct types.StateAccount + if err := rlp.DecodeBytes(nBlob, &nAcct); err != nil { + return err + } + // Account has no slot, empty slot set is expected + if nAcct.Root == types.EmptyRootHash { + if len(slots) != 0 { + return fmt.Errorf("unexpected slot changes %x", addrHash) + } + return nil + } + // Account has slots, ensure all new slots are contained + st, err := trie.New(trie.StorageTrieID(next, addrHash, nAcct.Root), db) + if err != nil { + return err + } + for key, val := range slots { + st.Update(key.Bytes(), val) + } + if st.Hash() != types.EmptyRootHash { + return errors.New("invalid slot changes") + } + return nil +} + +// verifyAccountUpdate this function is called once the state diff says that +// specific account was present. A serial of checks will be performed to +// ensure the state diff is correct, includes: +// +// - the account was indeed present in trie +// - the account in old trie matches the provided value +// - the slots transition is correct +func (test *stateTest) verifyAccountUpdate(next common.Hash, db *trie.Database, otr, ntr *trie.Trie, addrHash common.Hash, origin []byte, slots map[common.Hash][]byte) error { + // Verify account change + oBlob := otr.Get(addrHash.Bytes()) + nBlob := ntr.Get(addrHash.Bytes()) + if len(oBlob) == 0 { + return fmt.Errorf("missing account in old trie, %x", addrHash) + } + full, err := snapshot.FullAccountRLP(origin) + if err != nil { + return err + } + if !bytes.Equal(full, oBlob) { + return fmt.Errorf("account value is not matched, %x", addrHash) + } + + // Decode accounts + var ( + oAcct types.StateAccount + nAcct types.StateAccount + nRoot common.Hash + ) + if err := rlp.DecodeBytes(oBlob, &oAcct); err != nil { + return err + } + if len(nBlob) == 0 { + nRoot = types.EmptyRootHash + } else { + if err := rlp.DecodeBytes(nBlob, &nAcct); err != nil { + return err + } + nRoot = nAcct.Root + } + + // Verify storage + st, err := trie.New(trie.StorageTrieID(next, addrHash, nRoot), db) + if err != nil { + return err + } + for key, val := range slots { + st.Update(key.Bytes(), val) + } + if st.Hash() != oAcct.Root { + return errors.New("invalid slot changes") + } + return nil +} + +func (test *stateTest) verify(root common.Hash, next common.Hash, db *trie.Database, accountsOrigin map[common.Hash][]byte, storagesOrigin map[common.Hash]map[common.Hash][]byte) error { + otr, err := trie.New(trie.StateTrieID(root), db) + if err != nil { + return err + } + ntr, err := trie.New(trie.StateTrieID(next), db) + if err != nil { + return err + } + for addrHash, account := range accountsOrigin { + var err error + if len(account) == 0 { + err = test.verifyAccountCreation(next, db, otr, ntr, addrHash, storagesOrigin[addrHash]) + } else { + err = test.verifyAccountUpdate(next, db, otr, ntr, addrHash, accountsOrigin[addrHash], storagesOrigin[addrHash]) + } + if err != nil { + return err + } + } + return nil +} + +func TestStateChanges(t *testing.T) { + config := &quick.Config{MaxCount: 1000} + err := quick.Check((*stateTest).run, config) + if cerr, ok := err.(*quick.CheckError); ok { + test := cerr.In[0].(*stateTest) + t.Errorf("%v:\n%s", test.err, test) + } else if err != nil { + t.Error(err) + } +} diff --git a/core/types/state_account.go b/core/types/state_account.go index a80a048f1..5853fb58b 100644 --- a/core/types/state_account.go +++ b/core/types/state_account.go @@ -20,8 +20,11 @@ import ( "math/big" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" ) +var emptyCodeHash = crypto.Keccak256(nil) + // StateAccount is the Ethereum consensus representation of accounts. // These objects are stored in the main account trie. type StateAccount struct { @@ -31,6 +34,29 @@ type StateAccount struct { CodeHash []byte } +// NewEmptyStateAccount constructs an empty state account. +func NewEmptyStateAccount() *StateAccount { + return &StateAccount{ + Balance: new(big.Int), + Root: EmptyRootHash, + CodeHash: emptyCodeHash, + } +} + +// Copy returns a deep-copied state account object. +func (acct *StateAccount) Copy() *StateAccount { + var balance *big.Int + if acct.Balance != nil { + balance = new(big.Int).Set(acct.Balance) + } + return &StateAccount{ + Nonce: acct.Nonce, + Balance: balance, + Root: acct.Root, + CodeHash: common.CopyBytes(acct.CodeHash), + } +} + type DirtyStateAccount struct { Address common.Address `json:"address"` Nonce uint64 `json:"nonce"`