Skip to content
This repository has been archived by the owner on Aug 2, 2021. It is now read-only.

Robustness #1153

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 70 additions & 21 deletions swarm/network/kademlia.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type KadParams struct {
MaxProxDisplay int // number of rows the table shows
NeighbourhoodSize int // nearest neighbour core minimum cardinality
MinBinSize int // minimum number of peers in a row
HealthBinSize int // minimum number of peers per bin
MaxBinSize int // maximum number of peers in a row before pruning
RetryInterval int64 // initial interval before a peer is first redialed
RetryExponent int // exponent to multiply retry intervals with
Expand All @@ -71,6 +72,7 @@ func NewKadParams() *KadParams {
MaxProxDisplay: 16,
NeighbourhoodSize: 2,
MinBinSize: 2,
HealthBinSize: 1,
MaxBinSize: 4,
RetryInterval: 4200000000, // 4.2 sec
MaxRetries: 42,
Expand Down Expand Up @@ -715,17 +717,16 @@ func (k *Kademlia) knowNeighbours(addrs [][]byte) (got bool, n int, missing [][]
// then we don't know all our neighbors
// (which sadly is all too common in modern society)
var gots int
var culprits [][]byte
for _, p := range addrs {
pk := common.Bytes2Hex(p)
if pm[pk] {
gots++
} else {
log.Trace(fmt.Sprintf("%08x: known nearest neighbour %s not found", k.base, pk))
culprits = append(culprits, p)
missing = append(missing, p)
}
}
return gots == len(addrs), gots, culprits
return gots == len(addrs), gots, missing
}

// connectedNeighbours tests if all neighbours in the peerpot
Expand All @@ -750,18 +751,49 @@ func (k *Kademlia) connectedNeighbours(peers [][]byte) (got bool, n int, missing
// iterate through nearest neighbors in the peerpot map
// if we can't find the neighbor in the map we created above
// then we don't know all our neighbors
var gots int
var culprits [][]byte
var connects int
for _, p := range peers {
pk := common.Bytes2Hex(p)
if pm[pk] {
gots++
connects++
} else {
log.Trace(fmt.Sprintf("%08x: ExpNN: %s not found", k.base, pk))
culprits = append(culprits, p)
missing = append(missing, p)
}
}
return connects == len(peers), connects, missing
}

// getWeakBins checks whether the node is connected to a health minimum of peers it knows about in bins that are shallower than depth
// it returns an array of bin proximity orders for which this is not the case
// TODO move to separate testing tools file
func (k *Kademlia) getWeakBins() (missing []int) {
pk := make(map[int]int)
pc := make(map[int]int)

// create a map with all bins that have known peers
// in order deepest to shallowest compared to the kademlia base address
depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base)
k.eachAddr(nil, 255, func(_ *BzzAddr, po int) bool {
pk[po]++
return true
})
k.eachConn(nil, 255, func(_ *Peer, po int) bool {
pc[po]++
return true
})

for po, v := range pk {
if pc[po] == v {
continue
} else if po >= depth {
missing = append(missing, po)
} else if pc[po] < k.HealthBinSize {
missing = append(missing, po)
}

}
return gots == len(peers), gots, culprits
return missing
}

// Health state of the Kademlia
Expand All @@ -774,36 +806,53 @@ type Health struct {
CountConnectNN int // amount of neighbours connected to
MissingConnectNN [][]byte // which neighbours we should have been connected to but we're not
Saturated bool // whether we are connected to all the peers we would have liked to
Robust bool // whether we are connected to a minimum of peers in all the bins we have known peers in
Hive string
}

// Healthy reports the health state of the kademlia connectivity
// IsHealthyStrict return the strict interpretation of `Healthy` given a `Health` struct
// definition of strict health: all conditions must be true:
// - we at least know one peer
// - we know all neighbors
// - we are connected to all known neighbors
// - it is robust (we are connected to a minimum of peers in all the bins we have known peers in)
func (h *Health) IsHealthyStrict() bool {
return h.KnowNN && h.ConnectNN && h.CountKnowNN > 0 && h.Robust
}

// GetHealthInfo reports the health state of the kademlia connectivity
//
// The PeerPot argument provides an all-knowing view of the network
// The resulting Health object is a result of comparisons between
// what is the actual composition of the kademlia in question (the receiver), and
// what SHOULD it have been when we take all we know about the network into consideration.
//
// used for testing only
func (k *Kademlia) Healthy(pp *PeerPot) *Health {
func (k *Kademlia) GetHealthInfo(pp *PeerPot) *Health {
k.lock.RLock()
defer k.lock.RUnlock()
if len(pp.NNSet) < k.NeighbourhoodSize {
log.Warn("peerpot NNSet < NeighbourhoodSize")
}
gotnn, countgotnn, culpritsgotnn := k.connectedNeighbours(pp.NNSet)
knownn, countknownn, culpritsknownn := k.knowNeighbours(pp.NNSet)
connectNN, countConnectNN, missingConnectNN := k.connectedNeighbours(pp.NNSet)
knownNN, countKnownNN, missingKnownNN := k.knowNeighbours(pp.NNSet)
depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base)
saturated := k.saturation() < depth
log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, saturated: %v\n", k.base, knownn, gotnn, saturated))
isSaturated := k.saturation() < depth

weakBins := k.getWeakBins()
isRobust := len(weakBins) == 0

log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, isSaturated: %v, isRobust:%v\n", k.base, knownNN, connectNN, isSaturated, isRobust))

return &Health{
KnowNN: knownn,
CountKnowNN: countknownn,
MissingKnowNN: culpritsknownn,
ConnectNN: gotnn,
CountConnectNN: countgotnn,
MissingConnectNN: culpritsgotnn,
Saturated: saturated,
KnowNN: knownNN,
CountKnowNN: countKnownNN,
MissingKnowNN: missingKnownNN,
ConnectNN: connectNN,
CountConnectNN: countConnectNN,
MissingConnectNN: missingConnectNN,
Saturated: isSaturated,
Robust: isRobust,
Hive: k.string(),
}
}
105 changes: 92 additions & 13 deletions swarm/network/kademlia_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2018 The go-ethereum Authors
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
Expand Down Expand Up @@ -168,10 +168,7 @@ func TestNeighbourhoodDepth(t *testing.T) {
testNum++
}

// TestHealthStrict tests the simplest definition of health
// Which means whether we are connected to all neighbors we know of
func TestHealthStrict(t *testing.T) {

// base address is all zeros
// no peers
// unhealthy (and lonely)
Expand Down Expand Up @@ -199,9 +196,9 @@ func TestHealthStrict(t *testing.T) {
tk.checkHealth(true, false)

// know three peers, connected to the two deepest
// healthy
// unhealthy (not robust)
tk.Register("00000000")
tk.checkHealth(true, false)
tk.checkHealth(false, false)

// know three peers, connected to all three
// healthy
Expand Down Expand Up @@ -229,7 +226,7 @@ func TestHealthStrict(t *testing.T) {
tk.checkHealth(true, false)
}

func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturated bool) {
tk.t.Helper()
kid := common.Bytes2Hex(tk.BaseAddr())
addrs := [][]byte{tk.BaseAddr()}
Expand All @@ -239,18 +236,100 @@ func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
})

pp := NewPeerPotMap(tk.NeighbourhoodSize, addrs)
healthParams := tk.Healthy(pp[kid])
healthParams := tk.GetHealthInfo(pp[kid])

// definition of health, all conditions but be true:
// - we at least know one peer
// - we know all neighbors
// - we are connected to all known neighbors
health := healthParams.KnowNN && healthParams.ConnectNN && healthParams.CountKnowNN > 0
health := healthParams.IsHealthyStrict()
if expectHealthy != health {
tk.t.Fatalf("expected kademlia health %v, is %v\n%v", expectHealthy, health, tk.String())
}
}

func TestIsRobust(t *testing.T) {
tk := newTestKademlia(t, "11111111")
isRobust(t, tk, true)

// know one peer but not connected
// not robust and not healthy
tk.Register("11100000")
log.Trace(tk.String())
isRobust(t, tk, false)

// know one peer and connected
// healthy and robust
tk.On("11100000")
isRobust(t, tk, true)

// know two peers, only one connected
// not healthy, not robust
tk.Register("11111100")
log.Trace(tk.String())
isRobust(t, tk, false)

// know two peers and connected to both
// healthy and robust
tk.On("11111100")
isRobust(t, tk, true)

// know three peers, connected to the two deepest
// healthy but not robust
tk.Register("00000000")
log.Trace(tk.String())
isRobust(t, tk, false)

// know three peers, connected to all three
// healthy and robust
tk.On("00000000")
isRobust(t, tk, true)

// add another peer in the zero-bin
// still healthy and robust
tk.Register("00000000")
log.Trace(tk.String())
isRobust(t, tk, true)

// add peers until depth
// healthy but not robust
tk.Register("10000000")
tk.Register("11000000")
log.Trace(tk.String())
isRobust(t, tk, false)

// add fourth peer deeper than current depth
// still healthy, still not robust
tk.On("10000000")
log.Trace(tk.String())
isRobust(t, tk, false)

// add fourth peer deeper than current depth
// healthy and robust
tk.On("11000000")
log.Trace(tk.String())
isRobust(t, tk, true)
}

// retrieves the health object based on the current connectivity of the given kademlia
func getHealth(k *Kademlia) *Health {
kid := common.Bytes2Hex(k.BaseAddr())
addrs := [][]byte{k.BaseAddr()}
k.EachAddr(nil, 255, func(addr *BzzAddr, po int) bool {
addrs = append(addrs, addr.Address())
return true
})
pp := NewPeerPotMap(k.NeighbourhoodSize, addrs)
return k.GetHealthInfo(pp[kid])
}

// evaluates healthiness by taking into account robustial connections
// additional conditions for healthiness
// - IF we know of peers in bins shallower than depth, connected to at least HealthBinSize of them
func isRobust(t *testing.T, k *testKademlia, expectIsRobust bool) {
t.Helper()
healthParams := getHealth(k.Kademlia)
if expectIsRobust != healthParams.Robust {
t.Fatalf("expected kademlia potency %v, is %v\n%v", expectIsRobust, healthParams.Robust, k.String())
}
}

func (tk *testKademlia) checkSuggestPeer(expAddr string, expDepth int, expChanged bool) {
tk.t.Helper()
addr, depth, changed := tk.SuggestPeer()
Expand Down
4 changes: 2 additions & 2 deletions swarm/network/simulation/kademlia.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
addr := common.Bytes2Hex(k.BaseAddr())
pp := ppmap[addr]
//call Healthy RPC
h := k.Healthy(pp)
h := k.GetHealthInfo(pp)
//print info
log.Debug(k.String())
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
log.Debug("kademlia", "health", h.ConnectNN && h.KnowNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
log.Debug("kademlia", "ill condition", !h.ConnectNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id)
if !h.ConnectNN {
if !h.IsHealthyStrict() {
ill[id] = k
}
}
Expand Down
12 changes: 6 additions & 6 deletions swarm/network/simulations/discovery/discovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,11 @@ func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simul
}

healthy := &network.Health{}
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return false, fmt.Errorf("error getting node health: %s", err)
}
log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive))
return healthy.KnowNN && healthy.ConnectNN, nil
log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v, is robust: %v, \n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Robust, healthy.Hive))
return healthy.IsHealthyStrict(), nil
}

// 64 nodes ~ 1min
Expand Down Expand Up @@ -352,7 +352,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
healthy := &network.Health{}
addr := id.String()
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return fmt.Errorf("error getting node health: %s", err)
}

Expand Down Expand Up @@ -425,12 +425,12 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
healthy := &network.Health{}
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)

if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return false, fmt.Errorf("error getting node health: %s", err)
}
log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN))

return healthy.KnowNN && healthy.ConnectNN, nil
return healthy.IsHealthyStrict(), nil
}

// 64 nodes ~ 1min
Expand Down