From a29bef1a0037483de5ee089050d80694ee3cfd56 Mon Sep 17 00:00:00 2001 From: Dean Allen Date: Thu, 11 Aug 2022 16:02:36 -0700 Subject: [PATCH 1/5] amortize sorting of cache --- store/cachekv/store.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/store/cachekv/store.go b/store/cachekv/store.go index 1e7b74ff93a7..ec03bb0c61c3 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -2,6 +2,7 @@ package cachekv import ( "bytes" + "github.com/tendermint/tendermint/libs/math" "io" "sort" "sync" @@ -280,6 +281,7 @@ const ( // Constructs a slice of dirty items, to use w/ memIterator. func (store *Store) dirtyItems(start, end []byte) { + const THRESHOLD = 1024 startStr, endStr := conv.UnsafeBytesToStr(start), conv.UnsafeBytesToStr(end) if startStr > endStr { // Nothing to do here. @@ -294,7 +296,7 @@ func (store *Store) dirtyItems(start, end []byte) { // O(N^2) overhead. // Even without that, too many range checks eventually becomes more expensive // than just not having the cache. - if n < 1024 { + if n < THRESHOLD { for key := range store.unsortedCache { if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) { cacheValue := store.cache[key] @@ -325,6 +327,15 @@ func (store *Store) dirtyItems(start, end []byte) { startIndex = 0 } + // since we took the time to sort the cache, we should use that effort + // we store at least THRESHOLD values -- cost of storing all is amortized across multiple calls + if endIndex-startIndex < THRESHOLD { + endIndex = math.MinInt(startIndex+THRESHOLD, len(strL)-1) + if endIndex-startIndex < THRESHOLD { + startIndex = math.MaxInt(endIndex-THRESHOLD, 0) + } + } + kvL := make([]*kv.Pair, 0) for i := startIndex; i <= endIndex; i++ { key := strL[i] From 5662d0cadf1b549ed31ef747fd9d78c938369d2b Mon Sep 17 00:00:00 2001 From: Dean Allen Date: Thu, 11 Aug 2022 18:18:24 -0700 Subject: [PATCH 2/5] added benchmark --- store/cachekv/search_benchmark_test.go | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 store/cachekv/search_benchmark_test.go diff --git a/store/cachekv/search_benchmark_test.go b/store/cachekv/search_benchmark_test.go new file mode 100644 index 000000000000..1fd3539cf67c --- /dev/null +++ b/store/cachekv/search_benchmark_test.go @@ -0,0 +1,36 @@ +package cachekv + +import ( + db "github.com/tendermint/tm-db" + "strconv" + "testing" +) + +func BenchmarkLargeUnsortedMisses(b *testing.B) { + for i := 0; i < b.N; i++ { + cache := map[string]*cValue{} + unsorted := map[string]struct{}{} + for i := 0; i < 100_000; i++ { + key := "A" + strconv.Itoa(i) + unsorted[key] = struct{}{} + cache[key] = &cValue{} + } + + for i := 0; i < 100_000; i++ { + key := "Z" + strconv.Itoa(i) + unsorted[key] = struct{}{} + cache[key] = &cValue{} + } + + store := Store{ + cache: cache, + unsortedCache: unsorted, + sortedCache: db.NewMemDB(), + } + for k := 0; k < 10000; k++ { + // cache has A + Z values + // these are within range, but match nothing + store.dirtyItems([]byte("B1"), []byte("B2")) + } + } +} From 3cbb7400f1660f404a87bab10e0bb1a1223bae73 Mon Sep 17 00:00:00 2001 From: Dean Allen Date: Thu, 11 Aug 2022 22:20:01 -0700 Subject: [PATCH 3/5] add changelog, fix lint issue --- CHANGELOG.md | 1 + store/cachekv/store.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6dc151f972e..7428daf808ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ * [#12634](https://github.com/cosmos/cosmos-sdk/pull/12634) Move `sdk.Dec` to math package. * [#12596](https://github.com/cosmos/cosmos-sdk/pull/12596) Remove all imports of the non-existent gogo/protobuf v1.3.3 to ease downstream use and go workspaces. * [#12187](https://github.com/cosmos/cosmos-sdk/pull/12187) Add batch operation for x/nft module. +* [#12886](https://github.com/cosmos/cosmos-sdk/pull/12886) Amortize cost of processing cache KV store ### State Machine Breaking diff --git a/store/cachekv/store.go b/store/cachekv/store.go index ec03bb0c61c3..cacea7e36545 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -2,7 +2,6 @@ package cachekv import ( "bytes" - "github.com/tendermint/tendermint/libs/math" "io" "sort" "sync" @@ -14,6 +13,7 @@ import ( "github.com/cosmos/cosmos-sdk/store/tracekv" "github.com/cosmos/cosmos-sdk/store/types" "github.com/cosmos/cosmos-sdk/types/kv" + "github.com/tendermint/tendermint/libs/math" ) // cValue represents a cached value. From 239b0043f897774acdda9023a1d3972cd634a522 Mon Sep 17 00:00:00 2001 From: Dean Allen Date: Mon, 15 Aug 2022 20:27:52 -0700 Subject: [PATCH 4/5] updated benchmark, comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark stats: name old time/op new time/op delta LargeUnsortedMisses-32 21.2s ± 9% 0.0s ± 1% -99.91% (p=0.000 n=20+17) name old alloc/op new alloc/op delta LargeUnsortedMisses-32 1.64GB ± 0% 0.00GB ± 0% -99.83% (p=0.000 n=19+19) name old allocs/op new allocs/op delta LargeUnsortedMisses-32 20.0k ± 0% 41.1k ± 0% +105.23% (p=0.000 n=19+20) --- store/cachekv/search_benchmark_test.go | 43 +++++++++++++++----------- store/cachekv/store.go | 6 ++-- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/store/cachekv/search_benchmark_test.go b/store/cachekv/search_benchmark_test.go index 1fd3539cf67c..d7f1dcb8d4f1 100644 --- a/store/cachekv/search_benchmark_test.go +++ b/store/cachekv/search_benchmark_test.go @@ -8,25 +8,10 @@ import ( func BenchmarkLargeUnsortedMisses(b *testing.B) { for i := 0; i < b.N; i++ { - cache := map[string]*cValue{} - unsorted := map[string]struct{}{} - for i := 0; i < 100_000; i++ { - key := "A" + strconv.Itoa(i) - unsorted[key] = struct{}{} - cache[key] = &cValue{} - } - - for i := 0; i < 100_000; i++ { - key := "Z" + strconv.Itoa(i) - unsorted[key] = struct{}{} - cache[key] = &cValue{} - } + b.StopTimer() + store := generateStore() + b.StartTimer() - store := Store{ - cache: cache, - unsortedCache: unsorted, - sortedCache: db.NewMemDB(), - } for k := 0; k < 10000; k++ { // cache has A + Z values // these are within range, but match nothing @@ -34,3 +19,25 @@ func BenchmarkLargeUnsortedMisses(b *testing.B) { } } } + +func generateStore() *Store { + cache := map[string]*cValue{} + unsorted := map[string]struct{}{} + for i := 0; i < 5000; i++ { + key := "A" + strconv.Itoa(i) + unsorted[key] = struct{}{} + cache[key] = &cValue{} + } + + for i := 0; i < 5000; i++ { + key := "Z" + strconv.Itoa(i) + unsorted[key] = struct{}{} + cache[key] = &cValue{} + } + + return &Store{ + cache: cache, + unsortedCache: unsorted, + sortedCache: db.NewMemDB(), + } +} diff --git a/store/cachekv/store.go b/store/cachekv/store.go index cacea7e36545..071f07642f99 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -327,8 +327,10 @@ func (store *Store) dirtyItems(start, end []byte) { startIndex = 0 } - // since we took the time to sort the cache, we should use that effort - // we store at least THRESHOLD values -- cost of storing all is amortized across multiple calls + // Since we spent cycles to sort the values, we should process and remove a reasonable amount + // ensure start to end is at least THRESHOLD in size + // if below THRESHOLD, expand it to cover additional values + // this amortizes the cost of processing elements across multiple calls if endIndex-startIndex < THRESHOLD { endIndex = math.MinInt(startIndex+THRESHOLD, len(strL)-1) if endIndex-startIndex < THRESHOLD { From c76c3a6808da007ee7131672b6536e56987d8dd1 Mon Sep 17 00:00:00 2001 From: Dean Allen Date: Wed, 17 Aug 2022 10:18:32 -0700 Subject: [PATCH 5/5] THRESHOLD moved, renamed --- store/cachekv/store.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/store/cachekv/store.go b/store/cachekv/store.go index 071f07642f99..e2f7d2d20489 100644 --- a/store/cachekv/store.go +++ b/store/cachekv/store.go @@ -279,9 +279,10 @@ const ( stateAlreadySorted ) +const minSortSize = 1024 + // Constructs a slice of dirty items, to use w/ memIterator. func (store *Store) dirtyItems(start, end []byte) { - const THRESHOLD = 1024 startStr, endStr := conv.UnsafeBytesToStr(start), conv.UnsafeBytesToStr(end) if startStr > endStr { // Nothing to do here. @@ -296,7 +297,7 @@ func (store *Store) dirtyItems(start, end []byte) { // O(N^2) overhead. // Even without that, too many range checks eventually becomes more expensive // than just not having the cache. - if n < THRESHOLD { + if n < minSortSize { for key := range store.unsortedCache { if dbm.IsKeyInDomain(conv.UnsafeStrToBytes(key), start, end) { cacheValue := store.cache[key] @@ -328,13 +329,13 @@ func (store *Store) dirtyItems(start, end []byte) { } // Since we spent cycles to sort the values, we should process and remove a reasonable amount - // ensure start to end is at least THRESHOLD in size - // if below THRESHOLD, expand it to cover additional values + // ensure start to end is at least minSortSize in size + // if below minSortSize, expand it to cover additional values // this amortizes the cost of processing elements across multiple calls - if endIndex-startIndex < THRESHOLD { - endIndex = math.MinInt(startIndex+THRESHOLD, len(strL)-1) - if endIndex-startIndex < THRESHOLD { - startIndex = math.MaxInt(endIndex-THRESHOLD, 0) + if endIndex-startIndex < minSortSize { + endIndex = math.MinInt(startIndex+minSortSize, len(strL)-1) + if endIndex-startIndex < minSortSize { + startIndex = math.MaxInt(endIndex-minSortSize, 0) } }