TUN-3593: /ready endpoint for k8s readiness. Move tunnel events out of UI package, into connection package.

This commit is contained in:
Adam Chalmers
2020-11-30 14:05:37 -06:00
parent bda8fe2fbe
commit 38fb0b28b6
12 changed files with 259 additions and 99 deletions

View File

@@ -12,6 +12,7 @@ import (
"golang.org/x/net/trace"
"github.com/cloudflare/cloudflared/connection"
"github.com/cloudflare/cloudflared/logger"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -22,22 +23,35 @@ const (
startupTime = time.Millisecond * 500
)
func ServeMetrics(l net.Listener, shutdownC <-chan struct{}, logger logger.Service) (err error) {
func newMetricsHandler(connectionEvents <-chan connection.Event, log logger.Service) *http.ServeMux {
readyServer := NewReadyServer(connectionEvents, log)
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
mux.HandleFunc("/healthcheck", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "OK\n")
})
mux.Handle("/ready", readyServer)
return mux
}
func ServeMetrics(
l net.Listener,
shutdownC <-chan struct{},
connectionEvents <-chan connection.Event,
logger logger.Service,
) (err error) {
var wg sync.WaitGroup
// Metrics port is privileged, so no need for further access control
trace.AuthRequest = func(*http.Request) (bool, bool) { return true, true }
// TODO: parameterize ReadTimeout and WriteTimeout. The maximum time we can
// profile CPU usage depends on WriteTimeout
h := newMetricsHandler(connectionEvents, logger)
server := &http.Server{
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
Handler: h,
}
http.Handle("/metrics", promhttp.Handler())
http.Handle("/healthcheck", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "OK\n")
}))
wg.Add(1)
go func() {
defer wg.Done()

80
metrics/readiness.go Normal file
View File

@@ -0,0 +1,80 @@
package metrics
import (
"encoding/json"
"fmt"
"net/http"
"sync"
conn "github.com/cloudflare/cloudflared/connection"
"github.com/cloudflare/cloudflared/logger"
)
// ReadyServer serves HTTP 200 if the tunnel can serve traffic. Intended for k8s readiness checks.
type ReadyServer struct {
sync.RWMutex
isConnected map[int]bool
log logger.Service
}
// NewReadyServer initializes a ReadyServer and starts listening for dis/connection events.
func NewReadyServer(connectionEvents <-chan conn.Event, log logger.Service) *ReadyServer {
rs := ReadyServer{
isConnected: make(map[int]bool, 0),
log: log,
}
go func() {
for c := range connectionEvents {
switch c.EventType {
case conn.Connected:
rs.Lock()
rs.isConnected[int(c.Index)] = true
rs.Unlock()
case conn.Disconnected, conn.Reconnecting, conn.RegisteringTunnel:
rs.Lock()
rs.isConnected[int(c.Index)] = false
rs.Unlock()
case conn.SetURL:
continue
default:
rs.log.Errorf("Unknown connection event case %v", c)
}
}
}()
return &rs
}
type body struct {
Status int `json:"status"`
ReadyConnections int `json:"readyConnections"`
}
// ServeHTTP responds with HTTP 200 if the tunnel is connected to the edge.
func (rs *ReadyServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
statusCode, readyConnections := rs.makeResponse()
w.WriteHeader(statusCode)
body := body{
Status: statusCode,
ReadyConnections: readyConnections,
}
msg, err := json.Marshal(body)
if err != nil {
fmt.Fprintf(w, `{"error": "%s"}`, err)
}
w.Write(msg)
}
// This is the bulk of the logic for ServeHTTP, broken into its own pure function
// to make unit testing easy.
func (rs *ReadyServer) makeResponse() (statusCode, readyConnections int) {
statusCode = http.StatusServiceUnavailable
rs.RLock()
defer rs.RUnlock()
for _, connected := range rs.isConnected {
if connected {
statusCode = http.StatusOK
readyConnections++
}
}
return statusCode, readyConnections
}

58
metrics/readiness_test.go Normal file
View File

@@ -0,0 +1,58 @@
package metrics
import (
"net/http"
"testing"
)
func TestReadyServer_makeResponse(t *testing.T) {
type fields struct {
isConnected map[int]bool
}
tests := []struct {
name string
fields fields
wantOK bool
wantReadyConnections int
}{
{
name: "One connection online => HTTP 200",
fields: fields{
isConnected: map[int]bool{
0: false,
1: false,
2: true,
3: false,
},
},
wantOK: true,
wantReadyConnections: 1,
},
{
name: "No connections online => no HTTP 200",
fields: fields{
isConnected: map[int]bool{
0: false,
1: false,
2: false,
3: false,
},
},
wantReadyConnections: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rs := &ReadyServer{
isConnected: tt.fields.isConnected,
}
gotStatusCode, gotReadyConnections := rs.makeResponse()
if tt.wantOK && gotStatusCode != http.StatusOK {
t.Errorf("ReadyServer.makeResponse() gotStatusCode = %v, want ok = %v", gotStatusCode, tt.wantOK)
}
if gotReadyConnections != tt.wantReadyConnections {
t.Errorf("ReadyServer.makeResponse() gotReadyConnections = %v, want %v", gotReadyConnections, tt.wantReadyConnections)
}
})
}
}