TUN-9319: Add dynamic loading of features to connections via ConnectionOptionsSnapshot

Make sure to enforce snapshots of features and client information for each connection
so that the feature information can change in the background. This allows for new features
to only be applied to a connection if it completely disconnects and attempts a reconnect.

Updates the feature refresh time to 1 hour from previous cloudflared versions which
refreshed every 6 hours.

Closes TUN-9319
This commit is contained in:
Devin Carr
2025-05-14 20:11:05 +00:00
parent 02705c44b2
commit 3bf9217de5
14 changed files with 359 additions and 106 deletions

View File

@@ -7,6 +7,7 @@ import (
"hash/fnv"
"net"
"slices"
"sync"
"time"
"github.com/rs/zerolog"
@@ -15,22 +16,29 @@ import (
const (
featureSelectorHostname = "cfd-features.argotunnel.com"
lookupTimeout = time.Second * 10
defaultLookupFreq = time.Hour
)
// If the TXT record adds other fields, the umarshal logic will ignore those keys
// If the TXT record is missing a key, the field will unmarshal to the default Go value
type featuresRecord struct {
DatagramV3Percentage uint32 `json:"dv3_1"`
// DatagramV3Percentage int32 `json:"dv3"` // Removed in TUN-9291
// PostQuantumPercentage int32 `json:"pq"` // Removed in TUN-7970
}
func NewFeatureSelector(ctx context.Context, accountTag string, cliFeatures []string, pq bool, logger *zerolog.Logger) (*FeatureSelector, error) {
return newFeatureSelector(ctx, accountTag, logger, newDNSResolver(), cliFeatures, pq)
func NewFeatureSelector(ctx context.Context, accountTag string, cliFeatures []string, pq bool, logger *zerolog.Logger) (FeatureSelector, error) {
return newFeatureSelector(ctx, accountTag, logger, newDNSResolver(), cliFeatures, pq, defaultLookupFreq)
}
type FeatureSelector interface {
Snapshot() FeatureSnapshot
}
// FeatureSelector determines if this account will try new features; loaded once during startup.
type FeatureSelector struct {
type featureSelector struct {
accountHash uint32
logger *zerolog.Logger
resolver resolver
@@ -38,10 +46,12 @@ type FeatureSelector struct {
staticFeatures staticFeatures
cliFeatures []string
features featuresRecord
// lock protects concurrent access to dynamic features
lock sync.RWMutex
remoteFeatures featuresRecord
}
func newFeatureSelector(ctx context.Context, accountTag string, logger *zerolog.Logger, resolver resolver, cliFeatures []string, pq bool) (*FeatureSelector, error) {
func newFeatureSelector(ctx context.Context, accountTag string, logger *zerolog.Logger, resolver resolver, cliFeatures []string, pq bool, refreshFreq time.Duration) (*featureSelector, error) {
// Combine default features and user-provided features
var pqMode *PostQuantumMode
if pq {
@@ -52,7 +62,7 @@ func newFeatureSelector(ctx context.Context, accountTag string, logger *zerolog.
staticFeatures := staticFeatures{
PostQuantumMode: pqMode,
}
selector := &FeatureSelector{
selector := &featureSelector{
accountHash: switchThreshold(accountTag),
logger: logger,
resolver: resolver,
@@ -60,14 +70,32 @@ func newFeatureSelector(ctx context.Context, accountTag string, logger *zerolog.
cliFeatures: dedupAndRemoveFeatures(cliFeatures),
}
if err := selector.init(ctx); err != nil {
// Load the remote features
if err := selector.refresh(ctx); err != nil {
logger.Err(err).Msg("Failed to fetch features, default to disable")
}
// Spin off reloading routine
go selector.refreshLoop(ctx, refreshFreq)
return selector, nil
}
func (fs *FeatureSelector) PostQuantumMode() PostQuantumMode {
func (fs *featureSelector) Snapshot() FeatureSnapshot {
fs.lock.RLock()
defer fs.lock.RUnlock()
return FeatureSnapshot{
PostQuantum: fs.postQuantumMode(),
DatagramVersion: fs.datagramVersion(),
FeaturesList: fs.clientFeatures(),
}
}
func (fs *featureSelector) accountEnabled(percentage uint32) bool {
return percentage > fs.accountHash
}
func (fs *featureSelector) postQuantumMode() PostQuantumMode {
if fs.staticFeatures.PostQuantumMode != nil {
return *fs.staticFeatures.PostQuantumMode
}
@@ -75,7 +103,7 @@ func (fs *FeatureSelector) PostQuantumMode() PostQuantumMode {
return PostQuantumPrefer
}
func (fs *FeatureSelector) DatagramVersion() DatagramVersion {
func (fs *featureSelector) datagramVersion() DatagramVersion {
// If user provides the feature via the cli, we take it as priority over remote feature evaluation
if slices.Contains(fs.cliFeatures, FeatureDatagramV3_1) {
return DatagramV3
@@ -85,16 +113,20 @@ func (fs *FeatureSelector) DatagramVersion() DatagramVersion {
return DatagramV2
}
if fs.accountEnabled(fs.remoteFeatures.DatagramV3Percentage) {
return DatagramV3
}
return DatagramV2
}
// ClientFeatures will return the list of currently available features that cloudflared should provide to the edge.
func (fs *FeatureSelector) ClientFeatures() []string {
// clientFeatures will return the list of currently available features that cloudflared should provide to the edge.
func (fs *featureSelector) clientFeatures() []string {
// Evaluate any remote features along with static feature list to construct the list of features
return dedupAndRemoveFeatures(slices.Concat(defaultFeatures, fs.cliFeatures, []string{string(fs.DatagramVersion())}))
return dedupAndRemoveFeatures(slices.Concat(defaultFeatures, fs.cliFeatures, []string{string(fs.datagramVersion())}))
}
func (fs *FeatureSelector) init(ctx context.Context) error {
func (fs *featureSelector) refresh(ctx context.Context) error {
record, err := fs.resolver.lookupRecord(ctx)
if err != nil {
return err
@@ -105,11 +137,29 @@ func (fs *FeatureSelector) init(ctx context.Context) error {
return err
}
fs.features = features
fs.lock.Lock()
defer fs.lock.Unlock()
fs.remoteFeatures = features
return nil
}
func (fs *featureSelector) refreshLoop(ctx context.Context, refreshFreq time.Duration) {
ticker := time.NewTicker(refreshFreq)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
err := fs.refresh(ctx)
if err != nil {
fs.logger.Err(err).Msg("Failed to refresh feature selector")
}
}
}
}
// resolver represents an object that can look up featuresRecord
type resolver interface {
lookupRecord(ctx context.Context) ([]byte, error)