TUN-3869: Improve reliability of graceful shutdown.

- Don't rely on edge to close connection on graceful shutdown in h2mux, start muxer shutdown from cloudflared.
- Don't retry failed connections after graceful shutdown has started.
- After graceful shutdown channel is closed we stop waiting for retry timer and don't try to restart tunnel loop.
- Use readonly channel for graceful shutdown in functions that only consume the signal
This commit is contained in:
Igor Postelnik
2021-02-04 18:07:49 -06:00
parent dbd90f270e
commit 0b16a473da
6 changed files with 95 additions and 83 deletions

View File

@@ -30,7 +30,7 @@ type h2muxConnection struct {
connIndex uint8
observer *Observer
gracefulShutdownC chan struct{}
gracefulShutdownC <-chan struct{}
stoppedGracefully bool
// newRPCClientFunc allows us to mock RPCs during testing
@@ -63,7 +63,7 @@ func NewH2muxConnection(
edgeConn net.Conn,
connIndex uint8,
observer *Observer,
gracefulShutdownC chan struct{},
gracefulShutdownC <-chan struct{},
) (*h2muxConnection, error, bool) {
h := &h2muxConnection{
config: config,
@@ -168,6 +168,7 @@ func (h *h2muxConnection) serveMuxer(ctx context.Context) error {
func (h *h2muxConnection) controlLoop(ctx context.Context, connectedFuse ConnectedFuse, isNamedTunnel bool) {
updateMetricsTickC := time.Tick(h.muxerConfig.MetricsUpdateFreq)
var shutdownCompleted <-chan struct{}
for {
select {
case <-h.gracefulShutdownC:
@@ -176,6 +177,10 @@ func (h *h2muxConnection) controlLoop(ctx context.Context, connectedFuse Connect
}
h.stoppedGracefully = true
h.gracefulShutdownC = nil
shutdownCompleted = h.muxer.Shutdown()
case <-shutdownCompleted:
return
case <-ctx.Done():
// UnregisterTunnel blocks until the RPC call returns
@@ -183,6 +188,7 @@ func (h *h2muxConnection) controlLoop(ctx context.Context, connectedFuse Connect
h.unregister(isNamedTunnel)
}
h.muxer.Shutdown()
// don't wait for shutdown to finish when context is closed, this is the hard termination path
return
case <-updateMetricsTickC:

View File

@@ -39,7 +39,7 @@ type http2Connection struct {
activeRequestsWG sync.WaitGroup
connectedFuse ConnectedFuse
gracefulShutdownC chan struct{}
gracefulShutdownC <-chan struct{}
stoppedGracefully bool
controlStreamErr error // result of running control stream handler
}
@@ -52,7 +52,7 @@ func NewHTTP2Connection(
observer *Observer,
connIndex uint8,
connectedFuse ConnectedFuse,
gracefulShutdownC chan struct{},
gracefulShutdownC <-chan struct{},
) *http2Connection {
return &http2Connection{
conn: conn,

View File

@@ -257,7 +257,8 @@ func TestGracefulShutdownHTTP2(t *testing.T) {
unregistered: make(chan struct{}),
}
http2Conn.newRPCClientFunc = rpcClientFactory.newMockRPCClient
http2Conn.gracefulShutdownC = make(chan struct{})
shutdownC := make(chan struct{})
http2Conn.gracefulShutdownC = shutdownC
ctx, cancel := context.WithCancel(context.Background())
var wg sync.WaitGroup
@@ -288,7 +289,7 @@ func TestGracefulShutdownHTTP2(t *testing.T) {
}
// signal graceful shutdown
close(http2Conn.gracefulShutdownC)
close(shutdownC)
select {
case <-rpcClientFactory.unregistered: