mirror of
https://github.com/cloudflare/cloudflared.git
synced 2025-07-27 19:49:57 +00:00
TUN-8709: Add session migration for datagram v3
When a registration response from cloudflared gets lost on it's way back to the edge, the edge service will retry and send another registration request. Since cloudflared already has bound the local UDP socket for the provided request id, we want to re-send the registration response. There are three types of retries that the edge will send: 1. A retry from the same QUIC connection index; cloudflared will just respond back with a registration response and reset the idle timer for the session. 2. A retry from a different QUIC connection index; cloudflared will need to migrate the current session connection to this new QUIC connection and reset the idle timer for the session. 3. A retry to a different cloudflared connector; cloudflared will eventually time the session out since no further packets will arrive to the session at the original connector. Closes TUN-8709
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
"io"
|
||||
"net"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
@@ -47,6 +48,9 @@ func newSessionIdleErr(timeout time.Duration) error {
|
||||
type Session interface {
|
||||
io.WriteCloser
|
||||
ID() RequestID
|
||||
ConnectionID() uint8
|
||||
ResetIdleTimer()
|
||||
Migrate(eyeball DatagramConn)
|
||||
// Serve starts the event loop for processing UDP packets
|
||||
Serve(ctx context.Context) error
|
||||
}
|
||||
@@ -55,31 +59,48 @@ type session struct {
|
||||
id RequestID
|
||||
closeAfterIdle time.Duration
|
||||
origin io.ReadWriteCloser
|
||||
eyeball DatagramWriter
|
||||
eyeball atomic.Pointer[DatagramConn]
|
||||
// activeAtChan is used to communicate the last read/write time
|
||||
activeAtChan chan time.Time
|
||||
closeChan chan error
|
||||
log *zerolog.Logger
|
||||
}
|
||||
|
||||
func NewSession(id RequestID, closeAfterIdle time.Duration, origin io.ReadWriteCloser, eyeball DatagramWriter, log *zerolog.Logger) Session {
|
||||
return &session{
|
||||
func NewSession(id RequestID, closeAfterIdle time.Duration, origin io.ReadWriteCloser, eyeball DatagramConn, log *zerolog.Logger) Session {
|
||||
session := &session{
|
||||
id: id,
|
||||
closeAfterIdle: closeAfterIdle,
|
||||
origin: origin,
|
||||
eyeball: eyeball,
|
||||
eyeball: atomic.Pointer[DatagramConn]{},
|
||||
// activeAtChan has low capacity. It can be full when there are many concurrent read/write. markActive() will
|
||||
// drop instead of blocking because last active time only needs to be an approximation
|
||||
activeAtChan: make(chan time.Time, 1),
|
||||
closeChan: make(chan error, 1),
|
||||
log: log,
|
||||
}
|
||||
session.eyeball.Store(&eyeball)
|
||||
return session
|
||||
}
|
||||
|
||||
func (s *session) ID() RequestID {
|
||||
return s.id
|
||||
}
|
||||
|
||||
func (s *session) ConnectionID() uint8 {
|
||||
eyeball := *(s.eyeball.Load())
|
||||
return eyeball.ID()
|
||||
}
|
||||
|
||||
func (s *session) Migrate(eyeball DatagramConn) {
|
||||
current := *(s.eyeball.Load())
|
||||
// Only migrate if the connection ids are different.
|
||||
if current.ID() != eyeball.ID() {
|
||||
s.eyeball.Store(&eyeball)
|
||||
}
|
||||
// The session is already running so we want to restart the idle timeout since no proxied packets have come down yet.
|
||||
s.markActive()
|
||||
}
|
||||
|
||||
func (s *session) Serve(ctx context.Context) error {
|
||||
go func() {
|
||||
// QUIC implementation copies data to another buffer before returning https://github.com/quic-go/quic-go/blob/v0.24.0/session.go#L1967-L1975
|
||||
@@ -107,9 +128,12 @@ func (s *session) Serve(ctx context.Context) error {
|
||||
s.log.Error().Int("packetSize", n).Msg("Session (origin) packet read was too large and was dropped")
|
||||
continue
|
||||
}
|
||||
// We need to synchronize on the eyeball in-case that the connection was migrated. This should be rarely a point
|
||||
// of lock contention, as a migration can only happen during startup of a session before traffic flow.
|
||||
eyeball := *(s.eyeball.Load())
|
||||
// Sending a packet to the session does block on the [quic.Connection], however, this is okay because it
|
||||
// will cause back-pressure to the kernel buffer if the writes are not fast enough to the edge.
|
||||
err = s.eyeball.SendUDPSessionDatagram(readBuffer[:DatagramPayloadHeaderLen+n])
|
||||
err = eyeball.SendUDPSessionDatagram(readBuffer[:DatagramPayloadHeaderLen+n])
|
||||
if err != nil {
|
||||
s.closeChan <- err
|
||||
return
|
||||
@@ -137,6 +161,14 @@ func (s *session) Write(payload []byte) (n int, err error) {
|
||||
return n, err
|
||||
}
|
||||
|
||||
// ResetIdleTimer will restart the current idle timer.
|
||||
//
|
||||
// This public method is used to allow operators of sessions the ability to extend the session using information that is
|
||||
// known external to the session itself.
|
||||
func (s *session) ResetIdleTimer() {
|
||||
s.markActive()
|
||||
}
|
||||
|
||||
// Sends the last active time to the idle checker loop without blocking. activeAtChan will only be full when there
|
||||
// are many concurrent read/write. It is fine to lose some precision
|
||||
func (s *session) markActive() {
|
||||
|
Reference in New Issue
Block a user