TUN-2637: Manage edge IPs in a region-aware manner

This commit is contained in:
Nick Vollmar
2019-12-13 17:05:21 -06:00
parent 87102a2646
commit 7e31b77646
10 changed files with 1011 additions and 206 deletions

View File

@@ -2,6 +2,7 @@ package connection
import (
"context"
"net"
"time"
"github.com/google/uuid"
@@ -19,9 +20,10 @@ const (
type Connection struct {
id uuid.UUID
muxer *h2mux.Muxer
addr *net.TCPAddr
}
func newConnection(muxer *h2mux.Muxer) (*Connection, error) {
func newConnection(muxer *h2mux.Muxer, addr *net.TCPAddr) (*Connection, error) {
id, err := uuid.NewRandom()
if err != nil {
return nil, err
@@ -29,6 +31,7 @@ func newConnection(muxer *h2mux.Muxer) (*Connection, error) {
return &Connection{
id: id,
muxer: muxer,
addr: addr,
}, nil
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"crypto/tls"
"fmt"
"math/rand"
"net"
"sync"
"time"
@@ -26,8 +27,28 @@ const (
// SRV record resolution TTL
resolveEdgeAddrTTL = 1 * time.Hour
subsystemEdgeAddrResolver = "edgeAddrResolver"
)
// Redeclare network functions so they can be overridden in tests.
var (
netLookupSRV = net.LookupSRV
netLookupIP = net.LookupIP
)
// If the call to net.LookupSRV fails, try to fall back to DoT from Cloudflare directly.
//
// Note: Instead of DoT, we could also have used DoH. Either of these:
// - directly via the JSON API (https://1.1.1.1/dns-query?ct=application/dns-json&name=_origintunneld._tcp.argotunnel.com&type=srv)
// - indirectly via `tunneldns.NewUpstreamHTTPS()`
// But both of these cases miss out on a key feature from the stdlib:
// "The returned records are sorted by priority and randomized by weight within a priority."
// (https://golang.org/pkg/net/#Resolver.LookupSRV)
// Does this matter? I don't know. It may someday. Let's use DoT so we don't need to worry about it.
// See also: Go feature request for stdlib-supported DoH: https://github.com/golang/go/issues/27552
var fallbackLookupSRV = lookupSRVWithDOT
var friendlyDNSErrorLines = []string{
`Please try the following things to diagnose this issue:`,
` 1. ensure that argotunnel.com is returning "origintunneld" service records.`,
@@ -40,11 +61,26 @@ var friendlyDNSErrorLines = []string{
// EdgeServiceDiscoverer is an interface for looking up Cloudflare's edge network addresses
type EdgeServiceDiscoverer interface {
// Addr returns an address to connect to cloudflare's edge network
Addr() *net.TCPAddr
// AvailableAddrs returns the number of unique addresses
AvailableAddrs() uint8
// Refresh rediscover Cloudflare's edge network addresses
// Addr returns an unused address to connect to cloudflare's edge network.
// Before this method returns, the address will be removed from the pool of available addresses,
// so the caller can assume they have exclusive access to the address for tunneling purposes.
// The caller should remember to put it back via ReplaceAddr or MarkAddrBad.
Addr() (*net.TCPAddr, error)
// AnyAddr returns an address to connect to cloudflare's edge network.
// It may or may not be in active use for a tunnel.
// The caller should NOT return it via ReplaceAddr or MarkAddrBad!
AnyAddr() (*net.TCPAddr, error)
// ReplaceAddr is called when the address is no longer needed, e.g. due to a scaling-down of numHAConnections.
// It returns the address to the pool of available addresses.
ReplaceAddr(addr *net.TCPAddr)
// MarkAddrBad is called when there was a connectivity error for the address.
// It marks the address as unused but doesn't return it to the pool of available addresses.
MarkAddrBad(addr *net.TCPAddr)
// AvailableAddrs returns the number of addresses available for use
// (less those that have been marked bad).
AvailableAddrs() int
// Refresh rediscovers Cloudflare's edge network addresses.
// It resets the state of "bad" addresses but not those in active use.
Refresh() error
}
@@ -52,16 +88,31 @@ type EdgeServiceDiscoverer interface {
// It implements EdgeServiceDiscoverer interface
type EdgeAddrResolver struct {
sync.Mutex
// Addrs to connect to cloudflare's edge network
// HA regions
regions []*region
// Logger for noteworthy events
logger *logrus.Entry
}
type region struct {
// Addresses that we expect will be in active use
addrs []*net.TCPAddr
// index of the next element to use in addrs
nextAddrIndex int
logger *logrus.Entry
// Addresses that are in active use.
// This is actually a set of net.TCPAddr's, but we can't make a map like
// map[net.TCPAddr]bool
// since net.TCPAddr contains a field of type net.IP and therefore it cannot be used as a map key.
// So instead we use map[string]*net.TCPAddr, where the keys are obtained by net.TCPAddr.String().
// (We keep the "raw" *net.TCPAddr values for the convenience of AnyAddr(). If that method didn't
// exist, we wouldn't strictly need the values, and this could be a map[string]bool.)
inUse map[string]*net.TCPAddr
// Addresses that were discarded due to a network error.
// Not sure what we'll do with these, but it feels good to keep them around for now.
bad []*net.TCPAddr
}
func NewEdgeAddrResolver(logger *logrus.Logger) (EdgeServiceDiscoverer, error) {
r := &EdgeAddrResolver{
logger: logger.WithField("subsystem", " edgeAddrResolver"),
logger: logger.WithField("subsystem", subsystemEdgeAddrResolver),
}
if err := r.Refresh(); err != nil {
return nil, err
@@ -69,83 +120,153 @@ func NewEdgeAddrResolver(logger *logrus.Logger) (EdgeServiceDiscoverer, error) {
return r, nil
}
func (r *EdgeAddrResolver) Addr() *net.TCPAddr {
func (r *EdgeAddrResolver) Addr() (*net.TCPAddr, error) {
r.Lock()
defer r.Unlock()
addr := r.addrs[r.nextAddrIndex]
r.nextAddrIndex = (r.nextAddrIndex + 1) % len(r.addrs)
return addr
// compute the largest region based on len(addrs)
var largestRegion *region
{
if len(r.regions) == 0 {
return nil, errors.New("No HA regions")
}
largestRegion = r.regions[0]
for _, region := range r.regions[1:] {
if len(region.addrs) > len(largestRegion.addrs) {
largestRegion = region
}
}
if len(largestRegion.addrs) == 0 {
return nil, errors.New("No IP address to claim")
}
}
var addr *net.TCPAddr
addr, largestRegion.addrs = popAddr(largestRegion.addrs)
largestRegion.inUse[addr.String()] = addr
return addr, nil
}
func (r *EdgeAddrResolver) AvailableAddrs() uint8 {
func (r *EdgeAddrResolver) AnyAddr() (*net.TCPAddr, error) {
r.Lock()
defer r.Unlock()
return uint8(len(r.addrs))
for _, region := range r.regions {
// return an unused addr
if len(region.addrs) > 0 {
return region.addrs[rand.Intn(len(region.addrs))], nil
}
// return an addr that's in use
for _, addr := range region.inUse {
return addr, nil
}
}
return nil, fmt.Errorf("No IP addresses")
}
func (r *EdgeAddrResolver) ReplaceAddr(addr *net.TCPAddr) {
r.Lock()
defer r.Unlock()
addrString := addr.String()
for _, region := range r.regions {
if _, ok := region.inUse[addrString]; ok {
delete(region.inUse, addrString)
region.addrs = append(region.addrs, addr)
break
}
}
}
func (r *EdgeAddrResolver) MarkAddrBad(addr *net.TCPAddr) {
r.Lock()
defer r.Unlock()
addrString := addr.String()
for _, region := range r.regions {
if _, ok := region.inUse[addrString]; ok {
delete(region.inUse, addrString)
region.bad = append(region.bad, addr)
break
}
}
}
func (r *EdgeAddrResolver) AvailableAddrs() int {
r.Lock()
defer r.Unlock()
result := 0
for _, region := range r.regions {
result += len(region.addrs)
}
return result
}
func (r *EdgeAddrResolver) Refresh() error {
newAddrs, err := EdgeDiscovery(r.logger)
addrLists, err := EdgeDiscovery(r.logger)
if err != nil {
return err
}
r.Lock()
defer r.Unlock()
r.addrs = newAddrs
r.nextAddrIndex = 0
inUse := allInUse(r.regions)
r.regions = makeHARegions(addrLists, inUse)
return nil
}
// HA service discovery lookup
func EdgeDiscovery(logger *logrus.Entry) ([]*net.TCPAddr, error) {
_, addrs, err := net.LookupSRV(srvService, srvProto, srvName)
// EdgeDiscovery implements HA service discovery lookup.
func EdgeDiscovery(logger *logrus.Entry) ([][]*net.TCPAddr, error) {
_, addrs, err := netLookupSRV(srvService, srvProto, srvName)
if err != nil {
// Try to fall back to DoT from Cloudflare directly.
//
// Note: Instead of DoT, we could also have used DoH. Either of these:
// - directly via the JSON API (https://1.1.1.1/dns-query?ct=application/dns-json&name=_origintunneld._tcp.argotunnel.com&type=srv)
// - indirectly via `tunneldns.NewUpstreamHTTPS()`
// But both of these cases miss out on a key feature from the stdlib:
// "The returned records are sorted by priority and randomized by weight within a priority."
// (https://golang.org/pkg/net/#Resolver.LookupSRV)
// Does this matter? I don't know. It may someday. Let's use DoT so we don't need to worry about it.
// See also: Go feature request for stdlib-supported DoH: https://github.com/golang/go/issues/27552
r := fallbackResolver(dotServerName, dotServerAddr)
ctx, cancel := context.WithTimeout(context.Background(), dotTimeout)
defer cancel()
_, fallbackAddrs, fallbackErr := r.LookupSRV(ctx, srvService, srvProto, srvName)
_, fallbackAddrs, fallbackErr := fallbackLookupSRV(srvService, srvProto, srvName)
if fallbackErr != nil || len(fallbackAddrs) == 0 {
// use the original DNS error `err` in messages, not `fallbackErr`
logger.Errorln("Error looking up Cloudflare edge IPs: the DNS query failed:", err)
for _, s := range friendlyDNSErrorLines {
logger.Errorln(s)
}
return nil, errors.Wrap(err, "Could not lookup srv records on _origintunneld._tcp.argotunnel.com")
return nil, errors.Wrapf(err, "Could not lookup srv records on _%v._%v.%v", srvService, srvProto, srvName)
}
// Accept the fallback results and keep going
addrs = fallbackAddrs
}
var resolvedIPsPerCNAME [][]*net.TCPAddr
var lookupErr error
for _, addr := range addrs {
ips, err := resolveSRVToTCP(addr)
if err != nil || len(ips) == 0 {
// don't return early, we might be able to resolve other addresses
lookupErr = err
continue
if err != nil {
return nil, err
}
resolvedIPsPerCNAME = append(resolvedIPsPerCNAME, ips)
}
ips := flattenServiceIPs(resolvedIPsPerCNAME)
if lookupErr == nil && len(ips) == 0 {
return nil, fmt.Errorf("Unknown service discovery error")
return resolvedIPsPerCNAME, nil
}
func lookupSRVWithDOT(service, proto, name string) (cname string, addrs []*net.SRV, err error) {
// Inspiration: https://github.com/artyom/dot/blob/master/dot.go
r := &net.Resolver{
PreferGo: true,
Dial: func(ctx context.Context, _ string, _ string) (net.Conn, error) {
var dialer net.Dialer
conn, err := dialer.DialContext(ctx, "tcp", dotServerAddr)
if err != nil {
return nil, err
}
tlsConfig := &tls.Config{ServerName: dotServerName}
return tls.Client(conn, tlsConfig), nil
},
}
return ips, lookupErr
ctx, cancel := context.WithTimeout(context.Background(), dotTimeout)
defer cancel()
return r.LookupSRV(ctx, srvService, srvProto, srvName)
}
func resolveSRVToTCP(srv *net.SRV) ([]*net.TCPAddr, error) {
ips, err := net.LookupIP(srv.Target)
ips, err := netLookupIP(srv.Target)
if err != nil {
return nil, err
return nil, errors.Wrapf(err, "Couldn't resolve SRV record %v", srv)
}
if len(ips) == 0 {
return nil, fmt.Errorf("SRV record %v had no IPs", srv)
}
addrs := make([]*net.TCPAddr, len(ips))
for i, ip := range ips {
@@ -154,43 +275,6 @@ func resolveSRVToTCP(srv *net.SRV) ([]*net.TCPAddr, error) {
return addrs, nil
}
// FlattenServiceIPs transposes and flattens the input slices such that the
// first element of the n inner slices are the first n elements of the result.
func flattenServiceIPs(ipsByService [][]*net.TCPAddr) []*net.TCPAddr {
var result []*net.TCPAddr
for len(ipsByService) > 0 {
filtered := ipsByService[:0]
for _, ips := range ipsByService {
if len(ips) == 0 {
// sanity check
continue
}
result = append(result, ips[0])
if len(ips) > 1 {
filtered = append(filtered, ips[1:])
}
}
ipsByService = filtered
}
return result
}
// Inspiration: https://github.com/artyom/dot/blob/master/dot.go
func fallbackResolver(serverName, serverAddress string) *net.Resolver {
return &net.Resolver{
PreferGo: true,
Dial: func(ctx context.Context, _ string, _ string) (net.Conn, error) {
var dialer net.Dialer
conn, err := dialer.DialContext(ctx, "tcp", serverAddress)
if err != nil {
return nil, err
}
tlsConfig := &tls.Config{ServerName: serverName}
return tls.Client(conn, tlsConfig), nil
},
}
}
// EdgeHostnameResolver discovers the addresses of Cloudflare's edge network via a list of server hostnames.
// It implements EdgeServiceDiscoverer interface, and is used mainly for testing connectivity.
type EdgeHostnameResolver struct {
@@ -199,13 +283,20 @@ type EdgeHostnameResolver struct {
hostnames []string
// Addrs to connect to cloudflare's edge network
addrs []*net.TCPAddr
// index of the next element to use in addrs
nextAddrIndex int
// Addresses that are in active use.
// This is actually a set of net.TCPAddr's. We have to encode the keys
// with .String(), since net.TCPAddr contains a field of type net.IP and
// therefore it cannot be used as a map key
inUse map[string]*net.TCPAddr
// Addresses that were discarded due to a network error.
// Not sure what we'll do with these, but it feels good to keep them around for now.
bad []*net.TCPAddr
}
func NewEdgeHostnameResolver(edgeHostnames []string) (EdgeServiceDiscoverer, error) {
r := &EdgeHostnameResolver{
hostnames: edgeHostnames,
inUse: map[string]*net.TCPAddr{},
}
if err := r.Refresh(); err != nil {
return nil, err
@@ -213,18 +304,49 @@ func NewEdgeHostnameResolver(edgeHostnames []string) (EdgeServiceDiscoverer, err
return r, nil
}
func (r *EdgeHostnameResolver) Addr() *net.TCPAddr {
func (r *EdgeHostnameResolver) Addr() (*net.TCPAddr, error) {
r.Lock()
defer r.Unlock()
addr := r.addrs[r.nextAddrIndex]
r.nextAddrIndex = (r.nextAddrIndex + 1) % len(r.addrs)
return addr
if len(r.addrs) == 0 {
return nil, errors.New("No IP address to claim")
}
var addr *net.TCPAddr
addr, r.addrs = popAddr(r.addrs)
r.inUse[addr.String()] = addr
return addr, nil
}
func (r *EdgeHostnameResolver) AvailableAddrs() uint8 {
func (r *EdgeHostnameResolver) AnyAddr() (*net.TCPAddr, error) {
r.Lock()
defer r.Unlock()
return uint8(len(r.addrs))
// return an unused addr
if len(r.addrs) > 0 {
return r.addrs[rand.Intn(len(r.addrs))], nil
}
// return an addr that's in use
for _, addr := range r.inUse {
return addr, nil
}
return nil, errors.New("No IP addresses")
}
func (r *EdgeHostnameResolver) ReplaceAddr(addr *net.TCPAddr) {
r.Lock()
defer r.Unlock()
delete(r.inUse, addr.String())
r.addrs = append(r.addrs, addr)
}
func (r *EdgeHostnameResolver) MarkAddrBad(addr *net.TCPAddr) {
r.Lock()
defer r.Unlock()
delete(r.inUse, addr.String())
r.bad = append(r.bad, addr)
}
func (r *EdgeHostnameResolver) AvailableAddrs() int {
r.Lock()
defer r.Unlock()
return len(r.addrs)
}
func (r *EdgeHostnameResolver) Refresh() error {
@@ -234,8 +356,14 @@ func (r *EdgeHostnameResolver) Refresh() error {
}
r.Lock()
defer r.Unlock()
r.addrs = newAddrs
r.nextAddrIndex = 0
var notInUse []*net.TCPAddr
for _, newAddr := range newAddrs {
if _, ok := r.inUse[newAddr.String()]; !ok {
notInUse = append(notInUse, newAddr)
}
}
r.addrs = notInUse
r.bad = nil
return nil
}
@@ -252,3 +380,41 @@ func ResolveAddrs(addrs []string) ([]*net.TCPAddr, error) {
}
return tcpAddrs, nil
}
// Compute total set of IP addresses in use. This is useful if the regions
// are returned in a different order, or if an IP address is assigned to
// a different region for some reasion.
func allInUse(regions []*region) map[string]*net.TCPAddr {
result := make(map[string]*net.TCPAddr)
for _, region := range regions {
for k, v := range region.inUse {
result[k] = v
}
}
return result
}
func makeHARegions(addrLists [][]*net.TCPAddr, inUse map[string]*net.TCPAddr) (regions []*region) {
for _, addrList := range addrLists {
region := &region{inUse: map[string]*net.TCPAddr{}}
for _, addr := range addrList {
addrString := addr.String()
// No matter what region `addr` used to belong to, it's now a part
// of this region, so add it to this region's `inUse` map.
if _, ok := inUse[addrString]; ok {
region.inUse[addrString] = addr
} else {
region.addrs = append(region.addrs, addr)
}
}
regions = append(regions, region)
}
return
}
func popAddr(addrs []*net.TCPAddr) (*net.TCPAddr, []*net.TCPAddr) {
first := addrs[0]
addrs[0] = nil // prevent memory leak
addrs = addrs[1:]
return first, addrs
}

View File

@@ -2,62 +2,316 @@ package connection
import (
"net"
"sync"
"testing"
"testing/quick"
"time"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
)
type mockEdgeServiceDiscoverer struct {
func TestEdgeDiscovery(t *testing.T) {
mockAddrs := newMockAddrs(19, 2, 5)
netLookupSRV = mockNetLookupSRV(mockAddrs)
netLookupIP = mockNetLookupIP(mockAddrs)
expectedAddrSet := map[string]bool{}
for _, addrs := range mockAddrs.addrMap {
for _, addr := range addrs {
expectedAddrSet[addr.String()] = true
}
}
addrLists, err := EdgeDiscovery(logrus.New().WithFields(logrus.Fields{}))
assert.NoError(t, err)
actualAddrSet := map[string]bool{}
for _, addrs := range addrLists {
for _, addr := range addrs {
actualAddrSet[addr.String()] = true
}
}
assert.Equal(t, expectedAddrSet, actualAddrSet)
}
func (mr *mockEdgeServiceDiscoverer) Addr() *net.TCPAddr {
return &net.TCPAddr{
IP: net.ParseIP("127.0.0.1"),
Port: 63102,
func TestAllInUse(t *testing.T) {
for _, testCase := range []struct {
regions []*region
expected map[string]*net.TCPAddr
}{
{
regions: nil,
expected: map[string]*net.TCPAddr{},
},
{
regions: []*region{
&region{inUse: map[string]*net.TCPAddr{}},
&region{inUse: map[string]*net.TCPAddr{}},
},
expected: map[string]*net.TCPAddr{},
},
{
regions: []*region{
&region{inUse: map[string]*net.TCPAddr{":1": &net.TCPAddr{Port: 1}}},
&region{inUse: map[string]*net.TCPAddr{":4": &net.TCPAddr{Port: 4}}},
},
expected: map[string]*net.TCPAddr{":1": &net.TCPAddr{Port: 1}, ":4": &net.TCPAddr{Port: 4}},
},
} {
actual := allInUse(testCase.regions)
assert.Equal(t, testCase.expected, actual)
}
}
func (mr *mockEdgeServiceDiscoverer) AvailableAddrs() uint8 {
return 1
func TestMakeRegions(t *testing.T) {
for _, testCase := range []struct {
addrList [][]*net.TCPAddr
inUse map[string]*net.TCPAddr
expected []*region
}{
{
addrList: [][]*net.TCPAddr{},
expected: nil,
},
{
addrList: [][]*net.TCPAddr{
[]*net.TCPAddr{&net.TCPAddr{Port: 1}, &net.TCPAddr{Port: 2}},
},
expected: []*region{
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 1}, &net.TCPAddr{Port: 2}}, inUse: map[string]*net.TCPAddr{}},
},
},
{
addrList: [][]*net.TCPAddr{
[]*net.TCPAddr{&net.TCPAddr{Port: 1}, &net.TCPAddr{Port: 2}},
[]*net.TCPAddr{&net.TCPAddr{Port: 3}, &net.TCPAddr{Port: 4}},
},
expected: []*region{
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 1}, &net.TCPAddr{Port: 2}}, inUse: map[string]*net.TCPAddr{}},
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 3}, &net.TCPAddr{Port: 4}}, inUse: map[string]*net.TCPAddr{}},
},
},
{
addrList: [][]*net.TCPAddr{
[]*net.TCPAddr{&net.TCPAddr{Port: 1}, &net.TCPAddr{Port: 2}},
[]*net.TCPAddr{&net.TCPAddr{Port: 3}, &net.TCPAddr{Port: 4}},
},
inUse: map[string]*net.TCPAddr{
":1": &net.TCPAddr{Port: 1},
":4": &net.TCPAddr{Port: 4},
},
expected: []*region{
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 2}}, inUse: map[string]*net.TCPAddr{":1": &net.TCPAddr{Port: 1}}},
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 3}}, inUse: map[string]*net.TCPAddr{":4": &net.TCPAddr{Port: 4}}},
},
},
} {
actual := makeHARegions(testCase.addrList, testCase.inUse)
assert.Equal(t, testCase.expected, actual)
}
}
func (mr *mockEdgeServiceDiscoverer) Refresh() error {
return nil
func assertIsBalanced(t *testing.T, regions []*region) bool {
// Compute max(len(region.addrs) for region in regions)
// No region should have significantly fewer addresses than this
var longestAddrs int
{
longestAddrs = 0
for _, region := range regions {
if l := len(region.addrs); l > longestAddrs {
longestAddrs = l
}
}
}
for _, region := range regions {
if len(region.addrs) == longestAddrs || len(region.addrs) == longestAddrs-1 {
continue
}
return assert.Fail(t,
"found a region with %v free addrs, while the longest addrs list is %v",
len(region.addrs), longestAddrs)
}
return true
}
func TestFlattenServiceIPs(t *testing.T) {
result := flattenServiceIPs([][]*net.TCPAddr{
[]*net.TCPAddr{
&net.TCPAddr{Port: 1},
&net.TCPAddr{Port: 2},
&net.TCPAddr{Port: 3},
&net.TCPAddr{Port: 4},
},
[]*net.TCPAddr{
&net.TCPAddr{Port: 10},
&net.TCPAddr{Port: 12},
&net.TCPAddr{Port: 13},
},
[]*net.TCPAddr{
&net.TCPAddr{Port: 21},
&net.TCPAddr{Port: 22},
&net.TCPAddr{Port: 23},
&net.TCPAddr{Port: 24},
&net.TCPAddr{Port: 25},
},
})
assert.EqualValues(t, []*net.TCPAddr{
&net.TCPAddr{Port: 1},
&net.TCPAddr{Port: 10},
&net.TCPAddr{Port: 21},
&net.TCPAddr{Port: 2},
&net.TCPAddr{Port: 12},
&net.TCPAddr{Port: 22},
&net.TCPAddr{Port: 3},
&net.TCPAddr{Port: 13},
&net.TCPAddr{Port: 23},
&net.TCPAddr{Port: 4},
&net.TCPAddr{Port: 24},
&net.TCPAddr{Port: 25},
}, result)
// Various end-to-end tests, run with quickcheck (i.e. the testing/quick package)
func TestEdgeAddrResolver(t *testing.T) {
concurrentReplacement := func(mockAddrs mockAddrs) bool {
netLookupSRV = mockNetLookupSRV(mockAddrs)
netLookupIP = mockNetLookupIP(mockAddrs)
resolver, err := NewEdgeAddrResolver(logrus.New())
if !assert.NoError(t, err) {
return false
}
assert.Equal(t, mockAddrs.numAddrs, resolver.AvailableAddrs(),
"every address should be initially available")
// Create several goroutines to simulate HA connections that acquire
// and replace IP addresses.
var wg sync.WaitGroup
wg.Add(mockAddrs.numAddrs)
for i := 0; i < mockAddrs.numAddrs; i++ {
go func() {
defer wg.Done()
const reconnectionCount = 50
for i := 0; i < reconnectionCount; i++ {
if resolver.AvailableAddrs() == 0 {
err = resolver.Refresh()
assert.NoError(t, err)
}
addr, err := resolver.Addr()
if !assert.NoError(t, err) {
return
}
time.Sleep(0) // allow some other goroutine to run
resolver.ReplaceAddr(addr)
time.Sleep(0) // allow some other goroutine to run
}
}()
}
wg.Wait()
assert.Equal(t, mockAddrs.numAddrs, resolver.AvailableAddrs(),
"every address should be available after replacement")
return !t.Failed()
}
badAddrWithRefresh := func(mockAddrs mockAddrs) bool {
netLookupSRV = mockNetLookupSRV(mockAddrs)
netLookupIP = mockNetLookupIP(mockAddrs)
resolver, err := NewEdgeAddrResolver(logrus.New())
if !assert.NoError(t, err) {
return false
}
assert.Equal(t, mockAddrs.numAddrs, resolver.AvailableAddrs(),
"every address should be initially available")
var addrs []*net.TCPAddr
for i := 0; i < mockAddrs.numAddrs; i++ {
assert.Equal(t, mockAddrs.numAddrs-i, resolver.AvailableAddrs())
addr, err := resolver.Addr()
assert.NoError(t, err)
addrs = append(addrs, addr)
}
assert.Equal(t, 0, resolver.AvailableAddrs(), "all addresses should have been taken")
_, err = resolver.Addr()
assert.Error(t, err)
anyAddr, err := resolver.AnyAddr()
assert.NoError(t, err, "should still be okay to call AnyAddr")
resolver.MarkAddrBad(anyAddr)
assert.Equal(t, 0, resolver.AvailableAddrs(), "all addresses should still be used")
_, err = resolver.Addr()
assert.Error(t, err, "all addresses should still be used")
err = resolver.Refresh()
assert.NoError(t, err, "Refresh() should have worked")
assert.Equal(t, 1, resolver.AvailableAddrs(),
"Refresh() should have reset the state of the 'bad' address")
addr, err := resolver.Addr()
assert.NoError(t, err)
assert.Equal(t, anyAddr, addr)
_, err = resolver.Addr()
assert.Error(t, err, "all addresses should be used again")
return !t.Failed()
}
assert.NoError(t, quick.Check(concurrentReplacement, nil))
assert.NoError(t, quick.Check(badAddrWithRefresh, nil))
}
// "White-box" test: runs Addr() and checks internal state
func TestEdgeAddrResolver_Addr(t *testing.T) {
e := &EdgeAddrResolver{regions: nil}
addr, err := e.Addr()
assert.Error(t, err)
testRegions := func() []*region {
return []*region{
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 1}}, inUse: map[string]*net.TCPAddr{":2": &net.TCPAddr{Port: 2}, ":3": &net.TCPAddr{Port: 3}}},
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 4}, &net.TCPAddr{Port: 5}}, inUse: map[string]*net.TCPAddr{":6": &net.TCPAddr{Port: 6}}},
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 7}, &net.TCPAddr{Port: 8}}, inUse: map[string]*net.TCPAddr{":9": &net.TCPAddr{Port: 9}}},
}
}
e = &EdgeAddrResolver{regions: testRegions()}
addr, err = e.Addr()
assert.NoError(t, err)
assert.Equal(t, &net.TCPAddr{Port: 4}, addr)
var expected []*region
{
expected = testRegions()
expected[1].addrs = expected[1].addrs[1:]
expected[1].inUse[":4"] = &net.TCPAddr{Port: 4}
}
assert.Equal(t, expected, e.regions)
}
// "White-box" test: runs AnyAddr() and checks internal state
func TestEdgeAddrResolver_AnyAddr(t *testing.T) {
e := &EdgeAddrResolver{regions: nil}
addr, err := e.AnyAddr()
assert.Error(t, err)
e = &EdgeAddrResolver{regions: []*region{&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 1}}, inUse: map[string]*net.TCPAddr{":2": &net.TCPAddr{Port: 2}}}}}
addr, err = e.AnyAddr()
assert.NoError(t, err)
assert.Equal(t, &net.TCPAddr{Port: 1}, addr, "should have chosen the inactive address")
e = &EdgeAddrResolver{regions: []*region{&region{inUse: map[string]*net.TCPAddr{":1": &net.TCPAddr{Port: 1}}}}}
addr, err = e.AnyAddr()
assert.NoError(t, err)
assert.Equal(t, &net.TCPAddr{Port: 1}, addr, "should have chosen an active address rather than nothing")
}
// "White-box" test: runs ReplaceAddr() and checks internal state
func TestEdgeAddrResolver_ReplaceAddr(t *testing.T) {
e := &EdgeAddrResolver{regions: nil}
e.ReplaceAddr(&net.TCPAddr{Port: 1}) // this shouldn't panic, I guess
testRegions := func() []*region {
return []*region{
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 1}}, inUse: map[string]*net.TCPAddr{":2": &net.TCPAddr{Port: 2}, ":3": &net.TCPAddr{Port: 3}}},
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 4}, &net.TCPAddr{Port: 5}}, inUse: map[string]*net.TCPAddr{":6": &net.TCPAddr{Port: 6}}},
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 7}, &net.TCPAddr{Port: 8}}, inUse: map[string]*net.TCPAddr{":9": &net.TCPAddr{Port: 9}}},
}
}
e = &EdgeAddrResolver{regions: testRegions()}
e.ReplaceAddr(&net.TCPAddr{Port: 6})
var expected []*region
{
expected = testRegions()
delete(expected[1].inUse, ":6")
expected[1].addrs = append(expected[1].addrs, &net.TCPAddr{Port: 6})
}
assert.Equal(t, expected, e.regions)
}
// "White-box" test: runs MarkAddrBad() and checks internal state
func TestEdgeAddrResolver_MarkAddrBad(t *testing.T) {
e := &EdgeAddrResolver{regions: nil}
e.ReplaceAddr(&net.TCPAddr{Port: 1}) // this shouldn't panic, I guess
testRegions := func() []*region {
return []*region{
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 1}}, inUse: map[string]*net.TCPAddr{":2": &net.TCPAddr{Port: 2}, ":3": &net.TCPAddr{Port: 3}}},
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 4}, &net.TCPAddr{Port: 5}}, inUse: map[string]*net.TCPAddr{":6": &net.TCPAddr{Port: 6}}},
&region{addrs: []*net.TCPAddr{&net.TCPAddr{Port: 7}, &net.TCPAddr{Port: 8}}, inUse: map[string]*net.TCPAddr{":9": &net.TCPAddr{Port: 9}}},
}
}
e = &EdgeAddrResolver{regions: testRegions()}
e.MarkAddrBad(&net.TCPAddr{Port: 6})
var expected []*region
{
expected = testRegions()
delete(expected[1].inUse, ":6")
expected[1].bad = append(expected[1].bad, &net.TCPAddr{Port: 6})
}
assert.Equal(t, expected, e.regions)
}

View File

@@ -127,7 +127,10 @@ func (em *EdgeManager) UpdateConfigurable(newConfigurable *EdgeManagerConfigurab
}
func (em *EdgeManager) newConnection(ctx context.Context) *tunnelpogs.ConnectError {
edgeTCPAddr := em.serviceDiscoverer.Addr()
edgeTCPAddr, err := em.serviceDiscoverer.Addr()
if err != nil {
return retryConnection(fmt.Sprintf("edge address discovery error: %v", err))
}
configurable := em.state.getConfigurable()
edgeConn, err := DialEdge(ctx, configurable.Timeout, em.tlsConfig, edgeTCPAddr)
if err != nil {
@@ -147,7 +150,7 @@ func (em *EdgeManager) newConnection(ctx context.Context) *tunnelpogs.ConnectErr
retryConnection(fmt.Sprintf("couldn't perform handshake with edge: %v", err))
}
h2muxConn, err := newConnection(muxer)
h2muxConn, err := newConnection(muxer, edgeTCPAddr)
if err != nil {
return retryConnection(fmt.Sprintf("couldn't create h2mux connection: %v", err))
}
@@ -186,6 +189,7 @@ func (em *EdgeManager) closeConnection(ctx context.Context) error {
return fmt.Errorf("no connection to close")
}
conn.Shutdown()
// teardown will be handled by EdgeManager.serveConn in another goroutine
return nil
}
@@ -193,6 +197,7 @@ func (em *EdgeManager) serveConn(ctx context.Context, conn *Connection) {
err := conn.Serve(ctx)
em.logger.WithError(err).Warn("Connection closed")
em.state.closeConnection(conn)
em.serviceDiscoverer.ReplaceAddr(conn.addr)
}
func (em *EdgeManager) noRetryMessage() string {
@@ -221,14 +226,14 @@ func newEdgeConnectionManagerState(configurable *EdgeManagerConfigurable, userCr
}
}
func (ems *edgeManagerState) shouldCreateConnection(availableEdgeAddrs uint8) bool {
func (ems *edgeManagerState) shouldCreateConnection(availableEdgeAddrs int) bool {
ems.RLock()
defer ems.RUnlock()
expectedHAConns := ems.configurable.NumHAConnections
expectedHAConns := int(ems.configurable.NumHAConnections)
if availableEdgeAddrs < expectedHAConns {
expectedHAConns = availableEdgeAddrs
}
return uint8(len(ems.conns)) < expectedHAConns
return len(ems.conns) < expectedHAConns
}
func (ems *edgeManagerState) shouldReduceConnection() bool {

View File

@@ -0,0 +1,118 @@
package connection
import (
"fmt"
"math"
"math/rand"
"net"
"reflect"
"testing/quick"
)
type mockAddrs struct {
// a set of synthetic SRV records
addrMap map[net.SRV][]*net.TCPAddr
// the total number of addresses, aggregated across addrMap.
// For the convenience of test code that would otherwise have to compute
// this by hand every time.
numAddrs int
}
func newMockAddrs(port uint16, numRegions uint8, numAddrsPerRegion uint8) mockAddrs {
addrMap := make(map[net.SRV][]*net.TCPAddr)
numAddrs := 0
for r := uint8(0); r < numRegions; r++ {
var (
srv = net.SRV{Target: fmt.Sprintf("test-region-%v.example.com", r), Port: port}
addrs []*net.TCPAddr
)
for a := uint8(0); a < numAddrsPerRegion; a++ {
addrs = append(addrs, &net.TCPAddr{
IP: net.ParseIP(fmt.Sprintf("10.0.%v.%v", r, a)),
Port: int(port),
})
}
addrMap[srv] = addrs
numAddrs += len(addrs)
}
return mockAddrs{addrMap: addrMap, numAddrs: numAddrs}
}
var _ quick.Generator = mockAddrs{}
func (mockAddrs) Generate(rand *rand.Rand, size int) reflect.Value {
port := uint16(rand.Intn(math.MaxUint16))
numRegions := uint8(1 + rand.Intn(10))
numAddrsPerRegion := uint8(1 + rand.Intn(32))
result := newMockAddrs(port, numRegions, numAddrsPerRegion)
return reflect.ValueOf(result)
}
// Returns a function compatible with net.LookupSRV that will return the SRV
// records from mockAddrs.
func mockNetLookupSRV(
m mockAddrs,
) func(service, proto, name string) (cname string, addrs []*net.SRV, err error) {
var addrs []*net.SRV
for k := range m.addrMap {
addr := k
addrs = append(addrs, &addr)
// We can't just do
// addrs = append(addrs, &k)
// `k` will be reused by subsequent loop iterations,
// so all the copies of `&k` would point to the same location.
}
return func(_, _, _ string) (string, []*net.SRV, error) {
return "", addrs, nil
}
}
// Returns a function compatible with net.LookupIP that translates the SRV records
// from mockAddrs into IP addresses, based on the TCP addresses in mockAddrs.
func mockNetLookupIP(
m mockAddrs,
) func(host string) ([]net.IP, error) {
return func(host string) ([]net.IP, error) {
for srv, tcpAddrs := range m.addrMap {
if srv.Target != host {
continue
}
result := make([]net.IP, len(tcpAddrs))
for i, tcpAddr := range tcpAddrs {
result[i] = tcpAddr.IP
}
return result, nil
}
return nil, fmt.Errorf("No IPs for %v", host)
}
}
type mockEdgeServiceDiscoverer struct {
}
func (mr *mockEdgeServiceDiscoverer) Addr() (*net.TCPAddr, error) {
return &net.TCPAddr{
IP: net.ParseIP("127.0.0.1"),
Port: 63102,
}, nil
}
func (mr *mockEdgeServiceDiscoverer) AnyAddr() (*net.TCPAddr, error) {
return &net.TCPAddr{
IP: net.ParseIP("127.0.0.1"),
Port: 63102,
}, nil
}
func (mr *mockEdgeServiceDiscoverer) ReplaceAddr(addr *net.TCPAddr) {}
func (mr *mockEdgeServiceDiscoverer) MarkAddrBad(addr *net.TCPAddr) {}
func (mr *mockEdgeServiceDiscoverer) AvailableAddrs() int {
return 1
}
func (mr *mockEdgeServiceDiscoverer) Refresh() error {
return nil
}