Skip to content
This repository has been archived by the owner on Aug 2, 2021. It is now read-only.

Fix stream race tests #1208

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
4d65aea
swarm/network/stream: newStreamerTester cleanup only if err is nil
janos Feb 8, 2019
73d5c97
swarm/network/stream: raise newStreamerTester waitForPeers timeout
janos Feb 8, 2019
d233560
swarm/network/stream: fix data races in GetPeerSubscriptions
janos Feb 8, 2019
edfee9c
swarm/storage: prevent data race on LDBStore.batchesC
janos Feb 8, 2019
91f8735
swarm/network/stream: fix TestGetSubscriptionsRPC data race
janos Feb 8, 2019
11d9441
swarm/network/stream: correctly use Simulation.Run callback
janos Feb 8, 2019
e411147
swarm/network: protect addrCountC in Kademlia.AddrCountC function
janos Feb 11, 2019
080f4c2
p2p/simulations: fix a deadlock calling getRandomNode with lock
janos Feb 11, 2019
b151dc2
swarm/network/stream: terminate disconnect goruotines in tests
janos Feb 11, 2019
4f5807b
swarm/network/stream: reduce memory consumption when testing data races
janos Feb 11, 2019
dee2145
swarm/network/stream: add watchDisconnections helper function
janos Feb 12, 2019
c3f2368
swarm/network/stream: add concurrent counter for tests
janos Feb 12, 2019
baed029
swarm/network/stream: rename race/norace test files and use const
janos Feb 12, 2019
1467d2b
swarm/network/stream: remove watchSim and its panic
janos Feb 12, 2019
3710a72
swarm/network/stream: pass context in watchDisconnections
janos Feb 12, 2019
714d871
swarm/network/stream: add concurrent safe bool for watchDisconnections
janos Feb 12, 2019
d8f29cf
swarm/storage: fix LDBStore.batchesC data race by not closing it
janos Feb 13, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion p2p/simulations/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ func (net *Network) getRandomNode(ids []enode.ID, excludeIDs []enode.ID) *Node {
if l == 0 {
return nil
}
return net.GetNode(filtered[rand.Intn(l)])
return net.getNode(filtered[rand.Intn(l)])
}

func filterIDs(ids []enode.ID, excludeIDs []enode.ID) []enode.ID {
Expand Down
3 changes: 3 additions & 0 deletions swarm/network/kademlia.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,9 @@ func (k *Kademlia) sendNeighbourhoodDepthChange() {
// Not receiving from the returned channel will block Register function
// when address count value changes.
func (k *Kademlia) AddrCountC() <-chan int {
k.lock.Lock()
defer k.lock.Unlock()

if k.addrCountC == nil {
k.addrCountC = make(chan int)
}
Expand Down
11 changes: 7 additions & 4 deletions swarm/network/stream/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func newStreamerTester(registryOptions *RegistryOptions) (*p2ptest.ProtocolTeste
// temp datadir
datadir, err := ioutil.TempDir("", "streamer")
if err != nil {
return nil, nil, nil, func() {}, err
return nil, nil, nil, nil, err
}
removeDataDir := func() {
os.RemoveAll(datadir)
Expand All @@ -163,12 +163,14 @@ func newStreamerTester(registryOptions *RegistryOptions) (*p2ptest.ProtocolTeste

localStore, err := storage.NewTestLocalStoreForAddr(params)
if err != nil {
return nil, nil, nil, removeDataDir, err
removeDataDir()
return nil, nil, nil, nil, err
}

netStore, err := storage.NewNetStore(localStore, nil)
if err != nil {
return nil, nil, nil, removeDataDir, err
removeDataDir()
return nil, nil, nil, nil, err
}

delivery := NewDelivery(to, netStore)
Expand All @@ -180,8 +182,9 @@ func newStreamerTester(registryOptions *RegistryOptions) (*p2ptest.ProtocolTeste
}
protocolTester := p2ptest.NewProtocolTester(addr.ID(), 1, streamer.runProtocol)

err = waitForPeers(streamer, 1*time.Second, 1)
err = waitForPeers(streamer, 10*time.Second, 1)
if err != nil {
teardown()
return nil, nil, nil, nil, errors.New("timeout: peer is not created")
}

Expand Down
55 changes: 34 additions & 21 deletions swarm/network/stream/delivery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ func TestStreamerRetrieveRequest(t *testing.T) {
Syncing: SyncingDisabled,
}
tester, streamer, _, teardown, err := newStreamerTester(regOpts)
defer teardown()
if err != nil {
t.Fatal(err)
}
defer teardown()

node := tester.Nodes[0]

Expand Down Expand Up @@ -100,10 +100,10 @@ func TestStreamerUpstreamRetrieveRequestMsgExchangeWithoutStore(t *testing.T) {
Retrieval: RetrievalEnabled,
Syncing: SyncingDisabled, //do no syncing
})
defer teardown()
if err != nil {
t.Fatal(err)
}
defer teardown()

node := tester.Nodes[0]

Expand Down Expand Up @@ -172,10 +172,10 @@ func TestStreamerUpstreamRetrieveRequestMsgExchange(t *testing.T) {
Retrieval: RetrievalEnabled,
Syncing: SyncingDisabled,
})
defer teardown()
if err != nil {
t.Fatal(err)
}
defer teardown()

node := tester.Nodes[0]

Expand Down Expand Up @@ -362,10 +362,10 @@ func TestStreamerDownstreamChunkDeliveryMsgExchange(t *testing.T) {
Retrieval: RetrievalDisabled,
Syncing: SyncingDisabled,
})
defer teardown()
if err != nil {
t.Fatal(err)
}
defer teardown()

streamer.RegisterClientFunc("foo", func(p *Peer, t string, live bool) (Client, error) {
return &testClient{
Expand Down Expand Up @@ -485,7 +485,8 @@ func testDeliveryFromNodes(t *testing.T, nodes, chunkCount int, skipCheck bool)
}

log.Info("Starting simulation")
ctx := context.Background()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) (err error) {
nodeIDs := sim.UpNodeIDs()
//determine the pivot node to be the first node of the simulation
Expand Down Expand Up @@ -557,9 +558,16 @@ func testDeliveryFromNodes(t *testing.T, nodes, chunkCount int, skipCheck bool)

var disconnected atomic.Value
go func() {
for d := range disconnections {
if d.Error != nil {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
for {
select {
case <-ctx.Done():
return
case d := <-disconnections:
if d.Error != nil {
log.Error("peer drop event error", "node", d.NodeID, "peer", d.PeerID, "err", err)
} else {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
}
disconnected.Store(true)
}
}
Expand Down Expand Up @@ -589,7 +597,7 @@ func testDeliveryFromNodes(t *testing.T, nodes, chunkCount int, skipCheck bool)
return fmt.Errorf("Test failed, chunks not available on all nodes")
}
if err := <-retErrC; err != nil {
t.Fatalf("requesting chunks: %v", err)
return fmt.Errorf("requesting chunks: %v", err)
}
log.Debug("Test terminated successfully")
return nil
Expand Down Expand Up @@ -657,21 +665,22 @@ func benchmarkDeliveryFromNodes(b *testing.B, nodes, chunkCount int, skipCheck b
b.Fatal(err)
}

ctx := context.Background()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
result := sim.Run(ctx, func(ctx context.Context, sim *simulation.Simulation) (err error) {
nodeIDs := sim.UpNodeIDs()
node := nodeIDs[len(nodeIDs)-1]

item, ok := sim.NodeItem(node, bucketKeyFileStore)
if !ok {
b.Fatal("No filestore")
return errors.New("No filestore")
}
remoteFileStore := item.(*storage.FileStore)

pivotNode := nodeIDs[0]
item, ok = sim.NodeItem(pivotNode, bucketKeyNetStore)
if !ok {
b.Fatal("No filestore")
return errors.New("No filestore")
}
netStore := item.(*storage.NetStore)

Expand All @@ -687,9 +696,16 @@ func benchmarkDeliveryFromNodes(b *testing.B, nodes, chunkCount int, skipCheck b

var disconnected atomic.Value
go func() {
for d := range disconnections {
if d.Error != nil {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
for {
select {
case <-ctx.Done():
return
case d := <-disconnections:
if d.Error != nil {
log.Error("peer drop event error", "node", d.NodeID, "peer", d.PeerID, "err", err)
} else {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
}
disconnected.Store(true)
}
}
Expand All @@ -713,12 +729,12 @@ func benchmarkDeliveryFromNodes(b *testing.B, nodes, chunkCount int, skipCheck b
ctx := context.TODO()
hash, wait, err := remoteFileStore.Store(ctx, testutil.RandomReader(i, chunkSize), int64(chunkSize), false)
if err != nil {
b.Fatalf("expected no error. got %v", err)
return fmt.Errorf("store: %v", err)
}
// wait until all chunks stored
err = wait(ctx)
if err != nil {
b.Fatalf("expected no error. got %v", err)
return fmt.Errorf("wait store: %v", err)
}
// collect the hashes
hashes[i] = hash
Expand Down Expand Up @@ -754,10 +770,7 @@ func benchmarkDeliveryFromNodes(b *testing.B, nodes, chunkCount int, skipCheck b
break Loop
}
}
if err != nil {
b.Fatal(err)
}
return nil
return err
})
if result.Error != nil {
b.Fatal(result.Error)
Expand Down
19 changes: 12 additions & 7 deletions swarm/network/stream/intervals_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,11 @@ func testIntervals(t *testing.T, live bool, history *Range, skipCheck bool) {

_, wait, err := fileStore.Store(ctx, testutil.RandomReader(1, size), int64(size), false)
if err != nil {
log.Error("Store error: %v", "err", err)
t.Fatal(err)
return fmt.Errorf("store: %v", err)
}
err = wait(ctx)
if err != nil {
log.Error("Wait error: %v", "err", err)
t.Fatal(err)
return fmt.Errorf("wait store: %v", err)
}

item, ok = sim.NodeItem(checker, bucketKeyRegistry)
Expand All @@ -150,9 +148,16 @@ func testIntervals(t *testing.T, live bool, history *Range, skipCheck bool) {

var disconnected atomic.Value
go func() {
for d := range disconnections {
if d.Error != nil {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
for {
select {
case <-ctx.Done():
return
case d := <-disconnections:
if d.Error != nil {
log.Error("peer drop event error", "node", d.NodeID, "peer", d.PeerID, "err", err)
} else {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
}
disconnected.Store(true)
}
}
Expand Down
8 changes: 4 additions & 4 deletions swarm/network/stream/lightnode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ func TestLigthnodeRetrieveRequestWithRetrieve(t *testing.T) {
Syncing: SyncingDisabled,
}
tester, _, _, teardown, err := newStreamerTester(registryOptions)
defer teardown()
if err != nil {
t.Fatal(err)
}
defer teardown()

node := tester.Nodes[0]

Expand Down Expand Up @@ -68,10 +68,10 @@ func TestLigthnodeRetrieveRequestWithoutRetrieve(t *testing.T) {
Syncing: SyncingDisabled,
}
tester, _, _, teardown, err := newStreamerTester(registryOptions)
defer teardown()
if err != nil {
t.Fatal(err)
}
defer teardown()

node := tester.Nodes[0]

Expand Down Expand Up @@ -112,10 +112,10 @@ func TestLigthnodeRequestSubscriptionWithSync(t *testing.T) {
Syncing: SyncingRegisterOnly,
}
tester, _, _, teardown, err := newStreamerTester(registryOptions)
defer teardown()
if err != nil {
t.Fatal(err)
}
defer teardown()

node := tester.Nodes[0]

Expand Down Expand Up @@ -157,10 +157,10 @@ func TestLigthnodeRequestSubscriptionWithoutSync(t *testing.T) {
Syncing: SyncingDisabled,
}
tester, _, _, teardown, err := newStreamerTester(registryOptions)
defer teardown()
if err != nil {
t.Fatal(err)
}
defer teardown()

node := tester.Nodes[0]

Expand Down
41 changes: 37 additions & 4 deletions swarm/network/stream/snapshot_sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package stream
import (
"context"
"fmt"
"io/ioutil"
"os"
"runtime"
"sync"
Expand Down Expand Up @@ -92,6 +93,15 @@ func TestSyncingViaGlobalSync(t *testing.T) {
if *longrunning {
chnkCnt = []int{1, 8, 32, 256, 1024}
nodeCnt = []int{16, 32, 64, 128, 256}
} else if raceTest {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this still needed on top of using db vs in memory store?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is.

// TestSyncingViaGlobalSync allocates a lot of memory
// with race detector. By reducing the number of chunks
// and nodes, memory consumption is lower and data races
// are still checked, while correctness of syncing is
// tested with more chunks and nodes in regular (!race)
// tests.
chnkCnt = []int{4}
nodeCnt = []int{16}
} else {
//default test
chnkCnt = []int{4, 32}
Expand All @@ -113,7 +123,23 @@ var simServiceMap = map[string]simulation.ServiceFunc{
return nil, nil, err
}

r := NewRegistry(addr.ID(), delivery, netStore, state.NewInmemoryStore(), &RegistryOptions{
var dir string
var store *state.DBStore
if raceTest {
// Use on-disk DBStore to reduce memory consumption in race tests.
dir, err = ioutil.TempDir("", "swarm-stream-")
if err != nil {
return nil, nil, err
}
store, err = state.NewDBStore(dir)
if err != nil {
return nil, nil, err
}
} else {
store = state.NewInmemoryStore()
}

r := NewRegistry(addr.ID(), delivery, netStore, store, &RegistryOptions{
Retrieval: RetrievalDisabled,
Syncing: SyncingAutoSubscribe,
SyncUpdateDelay: 3 * time.Second,
Expand Down Expand Up @@ -164,9 +190,16 @@ func testSyncingViaGlobalSync(t *testing.T, chunkCount int, nodeCount int) {

var disconnected atomic.Value
go func() {
for d := range disconnections {
if d.Error != nil {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
for {
select {
case <-ctx.Done():
return
case d := <-disconnections:
if d.Error != nil {
log.Error("peer drop event error", "node", d.NodeID, "peer", d.PeerID, "err", err)
} else {
log.Error("peer drop", "node", d.NodeID, "peer", d.PeerID)
}
disconnected.Store(true)
}
}
Expand Down
6 changes: 6 additions & 0 deletions swarm/network/stream/stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -938,16 +938,22 @@ It returns a map of node IDs with an array of string representations of Stream o
func (api *API) GetPeerSubscriptions() map[string][]string {
//create the empty map
pstreams := make(map[string][]string)

//iterate all streamer peers
api.streamer.peersMu.RLock()
defer api.streamer.peersMu.RUnlock()

for id, p := range api.streamer.peers {
var streams []string
//every peer has a map of stream servers
//every stream server represents a subscription
p.serverMu.RLock()
for s := range p.servers {
//append the string representation of the stream
//to the list for this peer
streams = append(streams, s.String())
}
p.serverMu.RUnlock()
//set the array of stream servers to the map
pstreams[id.String()] = streams
}
Expand Down
Loading