Skip to content

Commit

Permalink
feat(testplans): add memory snapshots to stress test
Browse files Browse the repository at this point in the history
Add option to record heap profiles pre and post gc after every round
  • Loading branch information
hannahhoward committed Jan 28, 2021
1 parent 061d668 commit 9209d78
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 15 deletions.
3 changes: 2 additions & 1 deletion testplans/graphsync/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/ipfs/go-blockservice v0.1.3
github.com/ipfs/go-cid v0.0.6
github.com/ipfs/go-datastore v0.4.4
github.com/ipfs/go-ds-badger v0.2.3
github.com/ipfs/go-graphsync v0.1.2
github.com/ipfs/go-ipfs-blockstore v0.1.4
github.com/ipfs/go-ipfs-chunker v0.0.5
Expand All @@ -30,4 +31,4 @@ require (
google.golang.org/protobuf v1.25.0 // indirect
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
gopkg.in/yaml.v2 v2.2.8 // indirect
)
)
4 changes: 4 additions & 0 deletions testplans/graphsync/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dmitri.shuralyov.com/service/change v0.0.0-20181023043359-a85b471d5412/go.mod h1
dmitri.shuralyov.com/state v0.0.0-20180228185332-28bcc343414c/go.mod h1:0PRwlb0D6DFvNNtx+9ybjezNCa8XF0xaYcETyp6rHWU=
git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg=
github.com/AndreasBriese/bbloom v0.0.0-20180913140656-343706a395b7/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8=
github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9 h1:HD8gA2tkByhMAwYaFAX9w2l7vxvBQ5NMoxDrkhqhtn4=
github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Kubuxu/go-os-helper v0.0.1/go.mod h1:N8B+I7vPCT80IcP58r50u4+gEEcsZETFUpAzWW2ep1Y=
Expand Down Expand Up @@ -69,7 +70,9 @@ github.com/davidlazar/go-crypto v0.0.0-20190912175916-7055855a373f/go.mod h1:rQY
github.com/dgraph-io/badger v1.5.5-0.20190226225317-8115aed38f8f/go.mod h1:VZxzAIRPHRVNRKRo6AXrX9BJegn6il06VMTZVJYCIjQ=
github.com/dgraph-io/badger v1.6.0-rc1/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4=
github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4=
github.com/dgraph-io/badger v1.6.1 h1:w9pSFNSdq/JPM1N12Fz/F/bzo993Is1W+Q7HjPzi7yg=
github.com/dgraph-io/badger v1.6.1/go.mod h1:FRmFw3uxvcpa8zG3Rxs0th+hCLIuaQg8HlNV5bjgnuU=
github.com/dgraph-io/ristretto v0.0.2 h1:a5WaUrDa0qm0YrAAS1tUykT5El3kt62KNZZeMxQn3po=
github.com/dgraph-io/ristretto v0.0.2/go.mod h1:KPxhHT9ZxKefz+PCeOGsrHpl1qZ7i70dGTu2u+Ahh6E=
github.com/dgryski/go-farm v0.0.0-20190104051053-3adb47b1fb0f/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
Expand Down Expand Up @@ -198,6 +201,7 @@ github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46U
github.com/ipfs/go-ds-badger v0.0.2/go.mod h1:Y3QpeSFWQf6MopLTiZD+VT6IC1yZqaGmjvRcKeSGij8=
github.com/ipfs/go-ds-badger v0.0.5/go.mod h1:g5AuuCGmr7efyzQhLL8MzwqcauPojGPUaHzfGTzuE3s=
github.com/ipfs/go-ds-badger v0.2.1/go.mod h1:Tx7l3aTph3FMFrRS838dcSJh+jjA7cX9DrGVwx/NOwE=
github.com/ipfs/go-ds-badger v0.2.3 h1:J27YvAcpuA5IvZUbeBxOcQgqnYHUPxoygc6QxxkodZ4=
github.com/ipfs/go-ds-badger v0.2.3/go.mod h1:pEYw0rgg3FIrywKKnL+Snr+w/LjJZVMTBRn4FS6UHUk=
github.com/ipfs/go-ds-leveldb v0.0.1/go.mod h1:feO8V3kubwsEF22n0YRQCffeb79OOYIykR4L04tMOYc=
github.com/ipfs/go-ds-leveldb v0.4.1/go.mod h1:jpbku/YqBSsBc1qgME8BkWS4AxzF2cEu1Ii2r79Hh9s=
Expand Down
78 changes: 64 additions & 14 deletions testplans/graphsync/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"os"
"path/filepath"
goruntime "runtime"
"runtime/pprof"
"strings"
"time"

Expand Down Expand Up @@ -65,10 +66,10 @@ func (p networkParams) String() string {

func runStress(runenv *runtime.RunEnv, initCtx *run.InitContext) error {
var (
size = runenv.SizeParam("size")
concurrency = runenv.IntParam("concurrency")

networkParams = parseNetworkConfig(runenv)
size = runenv.SizeParam("size")
concurrency = runenv.IntParam("concurrency")
memorySnapshots = runenv.BooleanParam("memory_snapshots")
networkParams = parseNetworkConfig(runenv)
)
runenv.RecordMessage("started test instance")
runenv.RecordMessage("network params: %v", networkParams)
Expand Down Expand Up @@ -107,7 +108,7 @@ func runStress(runenv *runtime.RunEnv, initCtx *run.InitContext) error {
hookActions.ValidateRequest()
})

return runProvider(ctx, runenv, initCtx, dagsrv, size, networkParams, concurrency)
return runProvider(ctx, runenv, initCtx, dagsrv, size, networkParams, concurrency, memorySnapshots)

case "requestors":
runenv.RecordMessage("we are the requestor")
Expand All @@ -118,7 +119,7 @@ func runStress(runenv *runtime.RunEnv, initCtx *run.InitContext) error {
return err
}
runenv.RecordMessage("done dialling provider")
return runRequestor(ctx, runenv, initCtx, gsync, p, dagsrv, networkParams, concurrency, size)
return runRequestor(ctx, runenv, initCtx, gsync, p, dagsrv, networkParams, concurrency, size, memorySnapshots)

default:
panic(fmt.Sprintf("unsupported group ID: %s\n", runenv.TestGroupID))
Expand Down Expand Up @@ -158,7 +159,7 @@ func parseNetworkConfig(runenv *runtime.RunEnv) []networkParams {
return ret
}

func runRequestor(ctx context.Context, runenv *runtime.RunEnv, initCtx *run.InitContext, gsync gs.GraphExchange, p peer.AddrInfo, dagsrv format.DAGService, networkParams []networkParams, concurrency int, size uint64) error {
func runRequestor(ctx context.Context, runenv *runtime.RunEnv, initCtx *run.InitContext, gsync gs.GraphExchange, p peer.AddrInfo, dagsrv format.DAGService, networkParams []networkParams, concurrency int, size uint64, memorySnapshots bool) error {
var (
cids []cid.Cid
// create a selector for the whole UnixFS dag
Expand All @@ -167,9 +168,10 @@ func runRequestor(ctx context.Context, runenv *runtime.RunEnv, initCtx *run.Init

for round, np := range networkParams {
var (
topicCid = sync.NewTopic(fmt.Sprintf("cid-%d", round), []cid.Cid{})
stateNext = sync.State(fmt.Sprintf("next-%d", round))
stateNet = sync.State(fmt.Sprintf("network-configured-%d", round))
topicCid = sync.NewTopic(fmt.Sprintf("cid-%d", round), []cid.Cid{})
stateNext = sync.State(fmt.Sprintf("next-%d", round))
stateNet = sync.State(fmt.Sprintf("network-configured-%d", round))
stateFinish = sync.State(fmt.Sprintf("finish-%d", round))
)

// wait for all instances to be ready for the next state.
Expand Down Expand Up @@ -233,22 +235,30 @@ func runRequestor(ctx context.Context, runenv *runtime.RunEnv, initCtx *run.Init
if err := errgrp.Wait(); err != nil {
return err
}

// wait for all instances to finish running
initCtx.SyncClient.MustSignalAndWait(ctx, stateFinish, runenv.TestInstanceCount)

if memorySnapshots {
recordSnapshots(runenv, size, np, concurrency)
}
}

return nil
}

func runProvider(ctx context.Context, runenv *runtime.RunEnv, initCtx *run.InitContext, dagsrv format.DAGService, size uint64, networkParams []networkParams, concurrency int) error {
func runProvider(ctx context.Context, runenv *runtime.RunEnv, initCtx *run.InitContext, dagsrv format.DAGService, size uint64, networkParams []networkParams, concurrency int, memorySnapshots bool) error {
var (
cids []cid.Cid
bufferedDS = format.NewBufferedDAG(ctx, dagsrv)
)

for round, np := range networkParams {
var (
topicCid = sync.NewTopic(fmt.Sprintf("cid-%d", round), []cid.Cid{})
stateNext = sync.State(fmt.Sprintf("next-%d", round))
stateNet = sync.State(fmt.Sprintf("network-configured-%d", round))
topicCid = sync.NewTopic(fmt.Sprintf("cid-%d", round), []cid.Cid{})
stateNext = sync.State(fmt.Sprintf("next-%d", round))
stateFinish = sync.State(fmt.Sprintf("finish-%d", round))
stateNet = sync.State(fmt.Sprintf("network-configured-%d", round))
)

// wait for all instances to be ready for the next state.
Expand Down Expand Up @@ -314,6 +324,13 @@ func runProvider(ctx context.Context, runenv *runtime.RunEnv, initCtx *run.InitC
CallbackTarget: 1,
})
runenv.RecordMessage("\tnetwork configured for round %d", round)

// wait for all instances to finish running
initCtx.SyncClient.MustSignalAndWait(ctx, stateFinish, runenv.TestInstanceCount)

if memorySnapshots {
recordSnapshots(runenv, size, np, concurrency)
}
}

return nil
Expand Down Expand Up @@ -414,3 +431,36 @@ func createDatastore(diskStore bool) ds.Datastore {

return datastore
}

func recordSnapshots(runenv *runtime.RunEnv, size uint64, np networkParams, concurrency int) error {
runenv.RecordMessage("Recording heap profile...")
err := writeHeap(runenv, size, np, concurrency, "pre-gc")
if err != nil {
return err
}
goruntime.GC()
goruntime.GC()
err = writeHeap(runenv, size, np, concurrency, "post-gc")
if err != nil {
return err
}
return nil
}

func writeHeap(runenv *runtime.RunEnv, size uint64, np networkParams, concurrency int, postfix string) error {
snapshotName := fmt.Sprintf("heap_lat-%s_bw-%s_concurrency-%d_size-%s_%s", np.latency, humanize.IBytes(np.bandwidth), concurrency, humanize.Bytes(size), postfix)
snapshotName = strings.Replace(snapshotName, " ", "", -1)
snapshotFile, err := runenv.CreateRawAsset(snapshotName)
if err != nil {
return err
}
err = pprof.WriteHeapProfile(snapshotFile)
if err != nil {
return err
}
err = snapshotFile.Close()
if err != nil {
return err
}
return nil
}
1 change: 1 addition & 0 deletions testplans/graphsync/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ chunk_size = { type = "int", desc = "unixfs chunk size (power of 2)", default =
links_per_level = { type = "int", desc = "unixfs links per level", default = "1024" }
raw_leaves = { type = "bool", desc = "should unixfs leaves be left unwrapped", default = "true"}
disk_store = { type = "bool", desc = "should data be stored on disk (true) or memory (false)", default = "false"}
memory_snapshots = { type = "bool", desc = "should record heap dumps of memory performance", default = "false" }
5 changes: 5 additions & 0 deletions testplans/graphsync/stress-k8s.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ size = "10MB"
latencies = '["50ms", "100ms", "200ms"]'
bandwidths = '["32MiB", "16MiB", "8MiB", "4MiB", "1MiB"]'
concurrency = "10"
chunk_size = "20"
links_per_level = "1024"
raw_leaves = "true"
disk_store = "true"
memory_snapshots = "true"

[[groups]]
id = "providers"
Expand Down
1 change: 1 addition & 0 deletions testplans/graphsync/stress.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ chunk_size = "20"
links_per_level = "1024"
raw_leaves = "true"
disk_store = "true"
memory_snapshots = "true"

[[groups]]
id = "providers"
Expand Down

0 comments on commit 9209d78

Please sign in to comment.