mirror of
https://github.com/cloudflare/cloudflared.git
synced 2025-07-27 17:19:58 +00:00
TUN-8621: Prevent QUIC connection from closing before grace period after unregistering
Whenever cloudflared receives a SIGTERM or SIGINT it goes into graceful shutdown mode, which unregisters the connection and closes the control stream. Unregistering makes it so we no longer receive any new requests and makes the edge close the connection, allowing in-flight requests to finish (within a 3 minute period). This was working fine for http2 connections, but the quic proxy was cancelling the context as soon as the controls stream ended, forcing the process to stop immediately. This commit changes the behavior so that we wait the full grace period before cancelling the request
This commit is contained in:

committed by
chungthuang

parent
05249c7b51
commit
e251a21810
@@ -6,6 +6,8 @@ import (
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/cloudflare/cloudflared/management"
|
||||
"github.com/cloudflare/cloudflared/tunnelrpc"
|
||||
tunnelpogs "github.com/cloudflare/cloudflared/tunnelrpc/pogs"
|
||||
@@ -116,27 +118,32 @@ func (c *controlStream) ServeControlStream(
|
||||
}
|
||||
}
|
||||
|
||||
c.waitForUnregister(ctx, registrationClient)
|
||||
return nil
|
||||
return c.waitForUnregister(ctx, registrationClient)
|
||||
}
|
||||
|
||||
func (c *controlStream) waitForUnregister(ctx context.Context, registrationClient tunnelrpc.RegistrationClient) {
|
||||
func (c *controlStream) waitForUnregister(ctx context.Context, registrationClient tunnelrpc.RegistrationClient) error {
|
||||
// wait for connection termination or start of graceful shutdown
|
||||
defer registrationClient.Close()
|
||||
var shutdownError error
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
shutdownError = ctx.Err()
|
||||
break
|
||||
case <-c.gracefulShutdownC:
|
||||
c.stoppedGracefully = true
|
||||
}
|
||||
|
||||
c.observer.sendUnregisteringEvent(c.connIndex)
|
||||
registrationClient.GracefulShutdown(ctx, c.gracePeriod)
|
||||
err := registrationClient.GracefulShutdown(ctx, c.gracePeriod)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "Error shutting down control stream")
|
||||
}
|
||||
c.observer.log.Info().
|
||||
Int(management.EventTypeKey, int(management.Cloudflared)).
|
||||
Uint8(LogFieldConnIndex, c.connIndex).
|
||||
IPAddr(LogFieldIPAddress, c.edgeAddress).
|
||||
Msg("Unregistered tunnel connection")
|
||||
return shutdownError
|
||||
}
|
||||
|
||||
func (c *controlStream) IsStopped() bool {
|
||||
|
@@ -192,8 +192,9 @@ func (mc mockNamedTunnelRPCClient) RegisterConnection(
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (mc mockNamedTunnelRPCClient) GracefulShutdown(ctx context.Context, gracePeriod time.Duration) {
|
||||
func (mc mockNamedTunnelRPCClient) GracefulShutdown(ctx context.Context, gracePeriod time.Duration) error {
|
||||
close(mc.unregistered)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mockNamedTunnelRPCClient) Close() {}
|
||||
|
@@ -69,6 +69,7 @@ type QUICConnection struct {
|
||||
|
||||
rpcTimeout time.Duration
|
||||
streamWriteTimeout time.Duration
|
||||
gracePeriod time.Duration
|
||||
}
|
||||
|
||||
// NewQUICConnection returns a new instance of QUICConnection.
|
||||
@@ -86,6 +87,7 @@ func NewQUICConnection(
|
||||
packetRouterConfig *ingress.GlobalRouterConfig,
|
||||
rpcTimeout time.Duration,
|
||||
streamWriteTimeout time.Duration,
|
||||
gracePeriod time.Duration,
|
||||
) (*QUICConnection, error) {
|
||||
udpConn, err := createUDPConnForConnIndex(connIndex, localAddr, logger)
|
||||
if err != nil {
|
||||
@@ -122,6 +124,7 @@ func NewQUICConnection(
|
||||
connIndex: connIndex,
|
||||
rpcTimeout: rpcTimeout,
|
||||
streamWriteTimeout: streamWriteTimeout,
|
||||
gracePeriod: gracePeriod,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -144,8 +147,17 @@ func (q *QUICConnection) Serve(ctx context.Context) error {
|
||||
// In the future, if cloudflared can autonomously push traffic to the edge, we have to make sure the control
|
||||
// stream is already fully registered before the other goroutines can proceed.
|
||||
errGroup.Go(func() error {
|
||||
defer cancel()
|
||||
return q.serveControlStream(ctx, controlStream)
|
||||
// err is equal to nil if we exit due to unregistration. If that happens we want to wait the full
|
||||
// amount of the grace period, allowing requests to finish before we cancel the context, which will
|
||||
// make cloudflared exit.
|
||||
if err := q.serveControlStream(ctx, controlStream); err == nil {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case <-time.Tick(q.gracePeriod):
|
||||
}
|
||||
}
|
||||
cancel()
|
||||
return err
|
||||
})
|
||||
errGroup.Go(func() error {
|
||||
defer cancel()
|
||||
|
@@ -736,6 +736,7 @@ func testQUICConnection(udpListenerAddr net.Addr, t *testing.T, index uint8) *QU
|
||||
nil,
|
||||
15*time.Second,
|
||||
0*time.Second,
|
||||
0*time.Second,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
return qc
|
||||
|
Reference in New Issue
Block a user