TUN-5368: Log connection issues with LogLevel that depends on tunnel state

Connections from cloudflared to Cloudflare edge are long lived and may
break over time. That is expected for many reasons (ranging from network
conditions to operations within Cloudflare edge). Hence, logging that as
Error feels too strong and leads to users being concerned that something
is failing when it is actually expected.

With this change, we wrap logging about connection issues to be aware
of the tunnel state:
 - if the tunnel has no connections active, we log as error
 - otherwise we log as warning
This commit is contained in:
Nuno Diegues
2021-11-08 15:43:36 +00:00
parent 6bcc9a76e9
commit 1ee540a166
6 changed files with 148 additions and 60 deletions

View File

@@ -4,46 +4,32 @@ import (
"encoding/json"
"fmt"
"net/http"
"sync"
conn "github.com/cloudflare/cloudflared/connection"
"github.com/cloudflare/cloudflared/tunnelstate"
"github.com/rs/zerolog"
)
// ReadyServer serves HTTP 200 if the tunnel can serve traffic. Intended for k8s readiness checks.
type ReadyServer struct {
sync.RWMutex
isConnected map[int]bool
log *zerolog.Logger
tracker *tunnelstate.ConnTracker
}
// NewReadyServer initializes a ReadyServer and starts listening for dis/connection events.
func NewReadyServer(log *zerolog.Logger) *ReadyServer {
return &ReadyServer{
isConnected: make(map[int]bool, 0),
log: log,
tracker: tunnelstate.NewConnTracker(log),
}
}
func (rs *ReadyServer) OnTunnelEvent(c conn.Event) {
switch c.EventType {
case conn.Connected:
rs.Lock()
rs.isConnected[int(c.Index)] = true
rs.Unlock()
case conn.Disconnected, conn.Reconnecting, conn.RegisteringTunnel, conn.Unregistering:
rs.Lock()
rs.isConnected[int(c.Index)] = false
rs.Unlock()
default:
rs.log.Error().Msgf("Unknown connection event case %v", c)
}
rs.tracker.OnTunnelEvent(c)
}
type body struct {
Status int `json:"status"`
ReadyConnections int `json:"readyConnections"`
Status int `json:"status"`
ReadyConnections uint `json:"readyConnections"`
}
// ServeHTTP responds with HTTP 200 if the tunnel is connected to the edge.
@@ -63,15 +49,11 @@ func (rs *ReadyServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
// This is the bulk of the logic for ServeHTTP, broken into its own pure function
// to make unit testing easy.
func (rs *ReadyServer) makeResponse() (statusCode, readyConnections int) {
statusCode = http.StatusServiceUnavailable
rs.RLock()
defer rs.RUnlock()
for _, connected := range rs.isConnected {
if connected {
statusCode = http.StatusOK
readyConnections++
}
func (rs *ReadyServer) makeResponse() (statusCode int, readyConnections uint) {
readyConnections = rs.tracker.CountActiveConns()
if readyConnections > 0 {
return http.StatusOK, readyConnections
} else {
return http.StatusServiceUnavailable, readyConnections
}
return statusCode, readyConnections
}

View File

@@ -7,6 +7,8 @@ import (
"github.com/rs/zerolog"
"github.com/stretchr/testify/assert"
"github.com/cloudflare/cloudflared/tunnelstate"
"github.com/cloudflare/cloudflared/connection"
)
@@ -18,7 +20,7 @@ func TestReadyServer_makeResponse(t *testing.T) {
name string
fields fields
wantOK bool
wantReadyConnections int
wantReadyConnections uint
}{
{
name: "One connection online => HTTP 200",
@@ -49,7 +51,7 @@ func TestReadyServer_makeResponse(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rs := &ReadyServer{
isConnected: tt.fields.isConnected,
tracker: tunnelstate.MockedConnTracker(tt.fields.isConnected),
}
gotStatusCode, gotReadyConnections := rs.makeResponse()
if tt.wantOK && gotStatusCode != http.StatusOK {