TUN-5368: Log connection issues with LogLevel that depends on tunnel state

Connections from cloudflared to Cloudflare edge are long lived and may
break over time. That is expected for many reasons (ranging from network
conditions to operations within Cloudflare edge). Hence, logging that as
Error feels too strong and leads to users being concerned that something
is failing when it is actually expected.

With this change, we wrap logging about connection issues to be aware
of the tunnel state:
 - if the tunnel has no connections active, we log as error
 - otherwise we log as warning
This commit is contained in:
Nuno Diegues
2021-11-08 15:43:36 +00:00
parent 6bcc9a76e9
commit 1ee540a166
6 changed files with 148 additions and 60 deletions

View File

@@ -46,7 +46,7 @@ type Supervisor struct {
nextConnectedIndex int
nextConnectedSignal chan struct{}
log *zerolog.Logger
log *ConnAwareLogger
logTransport *zerolog.Logger
reconnectCredentialManager *reconnectCredentialManager
@@ -91,7 +91,7 @@ func NewSupervisor(config *TunnelConfig, reconnectCh chan ReconnectSignal, grace
edgeIPs: edgeIPs,
tunnelErrors: make(chan tunnelError),
tunnelsConnecting: map[int]chan struct{}{},
log: config.Log,
log: NewConnAwareLogger(config.Log, config.Observer),
logTransport: config.LogTransport,
reconnectCredentialManager: newReconnectCredentialManager(connection.MetricsNamespace, connection.TunnelSubsystem, config.HAConnections),
useReconnectToken: useReconnectToken,
@@ -123,7 +123,7 @@ func (s *Supervisor) Run(
if timer, err := s.reconnectCredentialManager.RefreshAuth(ctx, refreshAuthBackoff, s.authenticate); err == nil {
refreshAuthBackoffTimer = timer
} else {
s.log.Err(err).
s.log.Logger().Err(err).
Dur("refreshAuthRetryDuration", refreshAuthRetryDuration).
Msgf("supervisor: initial refreshAuth failed, retrying in %v", refreshAuthRetryDuration)
refreshAuthBackoffTimer = time.After(refreshAuthRetryDuration)
@@ -145,7 +145,7 @@ func (s *Supervisor) Run(
case tunnelError := <-s.tunnelErrors:
tunnelsActive--
if tunnelError.err != nil && !shuttingDown {
s.log.Err(tunnelError.err).Int(connection.LogFieldConnIndex, tunnelError.index).Msg("Connection terminated")
s.log.ConnAwareLogger().Err(tunnelError.err).Int(connection.LogFieldConnIndex, tunnelError.index).Msg("Connection terminated")
tunnelsWaiting = append(tunnelsWaiting, tunnelError.index)
s.waitForNextTunnel(tunnelError.index)
@@ -170,7 +170,7 @@ func (s *Supervisor) Run(
case <-refreshAuthBackoffTimer:
newTimer, err := s.reconnectCredentialManager.RefreshAuth(ctx, refreshAuthBackoff, s.authenticate)
if err != nil {
s.log.Err(err).Msg("supervisor: Authentication failed")
s.log.Logger().Err(err).Msg("supervisor: Authentication failed")
// Permanent failure. Leave the `select` without setting the
// channel to be non-null, so we'll never hit this case of the `select` again.
continue
@@ -195,7 +195,7 @@ func (s *Supervisor) initialize(
) error {
availableAddrs := s.edgeIPs.AvailableAddrs()
if s.config.HAConnections > availableAddrs {
s.log.Info().Msgf("You requested %d HA connections but I can give you at most %d.", s.config.HAConnections, availableAddrs)
s.log.Logger().Info().Msgf("You requested %d HA connections but I can give you at most %d.", s.config.HAConnections, availableAddrs)
s.config.HAConnections = availableAddrs
}
@@ -244,6 +244,7 @@ func (s *Supervisor) startFirstTunnel(
s.reconnectCredentialManager,
s.config,
addr,
s.log,
firstConnIndex,
connectedSignal,
s.cloudflaredUUID,
@@ -277,6 +278,7 @@ func (s *Supervisor) startFirstTunnel(
s.reconnectCredentialManager,
s.config,
addr,
s.log,
firstConnIndex,
connectedSignal,
s.cloudflaredUUID,
@@ -310,6 +312,7 @@ func (s *Supervisor) startTunnel(
s.reconnectCredentialManager,
s.config,
addr,
s.log,
uint8(index),
connectedSignal,
s.cloudflaredUUID,
@@ -373,7 +376,7 @@ func (s *Supervisor) authenticate(ctx context.Context, numPreviousAttempts int)
if err != nil {
return nil, err
}
rpcClient := connection.NewTunnelServerClient(ctx, stream, s.log)
rpcClient := connection.NewTunnelServerClient(ctx, stream, s.log.Logger())
defer rpcClient.Close()
const arbitraryConnectionID = uint8(0)