mirror of
https://github.com/cloudflare/cloudflared.git
synced 2025-07-29 04:19:57 +00:00
TUN-3869: Improve reliability of graceful shutdown.
- Don't rely on edge to close connection on graceful shutdown in h2mux, start muxer shutdown from cloudflared. - Don't retry failed connections after graceful shutdown has started. - After graceful shutdown channel is closed we stop waiting for retry timer and don't try to restart tunnel loop. - Use readonly channel for graceful shutdown in functions that only consume the signal
This commit is contained in:
@@ -58,16 +58,18 @@ type Supervisor struct {
|
||||
useReconnectToken bool
|
||||
|
||||
reconnectCh chan ReconnectSignal
|
||||
gracefulShutdownC chan struct{}
|
||||
gracefulShutdownC <-chan struct{}
|
||||
}
|
||||
|
||||
var errEarlyShutdown = errors.New("shutdown started")
|
||||
|
||||
type tunnelError struct {
|
||||
index int
|
||||
addr *net.TCPAddr
|
||||
err error
|
||||
}
|
||||
|
||||
func NewSupervisor(config *TunnelConfig, reconnectCh chan ReconnectSignal, gracefulShutdownC chan struct{}) (*Supervisor, error) {
|
||||
func NewSupervisor(config *TunnelConfig, reconnectCh chan ReconnectSignal, gracefulShutdownC <-chan struct{}) (*Supervisor, error) {
|
||||
cloudflaredUUID, err := uuid.NewRandom()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate cloudflared instance ID: %w", err)
|
||||
@@ -108,6 +110,9 @@ func (s *Supervisor) Run(
|
||||
connectedSignal *signal.Signal,
|
||||
) error {
|
||||
if err := s.initialize(ctx, connectedSignal); err != nil {
|
||||
if err == errEarlyShutdown {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
var tunnelsWaiting []int
|
||||
@@ -130,6 +135,7 @@ func (s *Supervisor) Run(
|
||||
}
|
||||
}
|
||||
|
||||
shuttingDown := false
|
||||
for {
|
||||
select {
|
||||
// Context cancelled
|
||||
@@ -143,7 +149,7 @@ func (s *Supervisor) Run(
|
||||
// (note that this may also be caused by context cancellation)
|
||||
case tunnelError := <-s.tunnelErrors:
|
||||
tunnelsActive--
|
||||
if tunnelError.err != nil {
|
||||
if tunnelError.err != nil && !shuttingDown {
|
||||
s.log.Err(tunnelError.err).Int(connection.LogFieldConnIndex, tunnelError.index).Msg("Connection terminated")
|
||||
tunnelsWaiting = append(tunnelsWaiting, tunnelError.index)
|
||||
s.waitForNextTunnel(tunnelError.index)
|
||||
@@ -181,6 +187,8 @@ func (s *Supervisor) Run(
|
||||
// No more tunnels outstanding, clear backoff timer
|
||||
backoff.SetGracePeriod()
|
||||
}
|
||||
case <-s.gracefulShutdownC:
|
||||
shuttingDown = true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -203,6 +211,8 @@ func (s *Supervisor) initialize(
|
||||
return ctx.Err()
|
||||
case tunnelError := <-s.tunnelErrors:
|
||||
return tunnelError.err
|
||||
case <-s.gracefulShutdownC:
|
||||
return errEarlyShutdown
|
||||
case <-connectedSignal.Wait():
|
||||
}
|
||||
// At least one successful connection, so start the rest
|
||||
|
Reference in New Issue
Block a user