TUN-5261: Collect QUIC metrics about RTT, packets and bytes transfered and log events at tracing level

This commit is contained in:
cthuang
2021-10-15 12:05:54 +01:00
parent 958650be1f
commit ff7c48568c
61 changed files with 12360 additions and 13 deletions

99
quic/conversion.go Normal file
View File

@@ -0,0 +1,99 @@
package quic
import (
"strconv"
"time"
"github.com/lucas-clemente/quic-go/logging"
)
func perspectiveString(p logging.Perspective) string {
switch p {
case logging.PerspectiveClient:
return "client"
case logging.PerspectiveServer:
return "server"
default:
return ""
}
}
// Helper to convert logging.ByteCount(alias for int64) to float64 used in prometheus
func byteCountToPromCount(count logging.ByteCount) float64 {
return float64(count)
}
// Helper to convert Duration to float64 used in prometheus
func durationToPromGauge(duration time.Duration) float64 {
return float64(duration.Milliseconds())
}
// Helper to convert https://pkg.go.dev/github.com/lucas-clemente/quic-go@v0.23.0/logging#PacketType into string
func packetTypeString(pt logging.PacketType) string {
switch pt {
case logging.PacketTypeInitial:
return "initial"
case logging.PacketTypeHandshake:
return "handshake"
case logging.PacketTypeRetry:
return "retry"
case logging.PacketType0RTT:
return "0_rtt"
case logging.PacketTypeVersionNegotiation:
return "version_negotiation"
case logging.PacketType1RTT:
return "1_rtt"
case logging.PacketTypeStatelessReset:
return "stateless_reset"
case logging.PacketTypeNotDetermined:
return "undetermined"
default:
return "unknown_packet_type"
}
}
// Helper to convert https://pkg.go.dev/github.com/lucas-clemente/quic-go@v0.23.0/logging#PacketDropReason into string
func packetDropReasonString(reason logging.PacketDropReason) string {
switch reason {
case logging.PacketDropKeyUnavailable:
return "key_unavailable"
case logging.PacketDropUnknownConnectionID:
return "unknown_conn_id"
case logging.PacketDropHeaderParseError:
return "header_parse_err"
case logging.PacketDropPayloadDecryptError:
return "payload_decrypt_err"
case logging.PacketDropProtocolViolation:
return "protocol_violation"
case logging.PacketDropDOSPrevention:
return "dos_prevention"
case logging.PacketDropUnsupportedVersion:
return "unsupported_version"
case logging.PacketDropUnexpectedPacket:
return "unexpected_packet"
case logging.PacketDropUnexpectedSourceConnectionID:
return "unexpected_src_conn_id"
case logging.PacketDropUnexpectedVersion:
return "unexpected_version"
case logging.PacketDropDuplicate:
return "duplicate"
default:
return "unknown_reason"
}
}
// Helper to convert https://pkg.go.dev/github.com/lucas-clemente/quic-go@v0.23.0/logging#PacketLossReason into string
func packetLossReasonString(reason logging.PacketLossReason) string {
switch reason {
case logging.PacketLossReorderingThreshold:
return "reordering"
case logging.PacketLossTimeThreshold:
return "timeout"
default:
return "unknown_loss_reason"
}
}
func uint8ToString(input uint8) string {
return strconv.FormatUint(uint64(input), 10)
}

373
quic/metrics.go Normal file
View File

@@ -0,0 +1,373 @@
package quic
import (
"sync"
"github.com/lucas-clemente/quic-go/logging"
"github.com/prometheus/client_golang/prometheus"
)
const (
namespace = "quic"
)
var (
clientConnLabels = []string{"conn_index"}
clientMetrics = struct {
totalConnections prometheus.Counter
closedConnections *prometheus.CounterVec
sentPackets *prometheus.CounterVec
sentBytes *prometheus.CounterVec
receivePackets *prometheus.CounterVec
receiveBytes *prometheus.CounterVec
bufferedPackets *prometheus.CounterVec
droppedPackets *prometheus.CounterVec
lostPackets *prometheus.CounterVec
minRTT *prometheus.GaugeVec
latestRTT *prometheus.GaugeVec
smoothedRTT *prometheus.GaugeVec
}{
totalConnections: prometheus.NewCounter(
totalConnectionsOpts(logging.PerspectiveClient),
),
closedConnections: prometheus.NewCounterVec(
closedConnectionsOpts(logging.PerspectiveClient),
[]string{"error"},
),
sentPackets: prometheus.NewCounterVec(
sentPacketsOpts(logging.PerspectiveClient),
clientConnLabels,
),
sentBytes: prometheus.NewCounterVec(
sentBytesOpts(logging.PerspectiveClient),
clientConnLabels,
),
receivePackets: prometheus.NewCounterVec(
receivePacketsOpts(logging.PerspectiveClient),
clientConnLabels,
),
receiveBytes: prometheus.NewCounterVec(
receiveBytesOpts(logging.PerspectiveClient),
clientConnLabels,
),
bufferedPackets: prometheus.NewCounterVec(
bufferedPacketsOpts(logging.PerspectiveClient),
append(clientConnLabels, "packet_type"),
),
droppedPackets: prometheus.NewCounterVec(
droppedPacketsOpts(logging.PerspectiveClient),
append(clientConnLabels, "packet_type", "reason"),
),
lostPackets: prometheus.NewCounterVec(
lostPacketsOpts(logging.PerspectiveClient),
append(clientConnLabels, "reason"),
),
minRTT: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: perspectiveString(logging.PerspectiveClient),
Name: "min_rtt",
Help: "Lowest RTT measured on a connection in millisec",
},
clientConnLabels,
),
latestRTT: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: perspectiveString(logging.PerspectiveClient),
Name: "latest_rtt",
Help: "Latest RTT measured on a connection",
},
clientConnLabels,
),
smoothedRTT: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: perspectiveString(logging.PerspectiveClient),
Name: "smoothed_rtt",
Help: "Calculated smoothed RTT measured on a connection in millisec",
},
clientConnLabels,
),
}
// The server has many QUIC connections. Adding per connection label incurs high memory cost
serverMetrics = struct {
totalConnections prometheus.Counter
closedConnections prometheus.Counter
sentPackets prometheus.Counter
sentBytes prometheus.Counter
receivePackets prometheus.Counter
receiveBytes prometheus.Counter
bufferedPackets *prometheus.CounterVec
droppedPackets *prometheus.CounterVec
lostPackets *prometheus.CounterVec
rtt prometheus.Histogram
}{
totalConnections: prometheus.NewCounter(
totalConnectionsOpts(logging.PerspectiveServer),
),
closedConnections: prometheus.NewCounter(
closedConnectionsOpts(logging.PerspectiveServer),
),
sentPackets: prometheus.NewCounter(
sentPacketsOpts(logging.PerspectiveServer),
),
sentBytes: prometheus.NewCounter(
sentBytesOpts(logging.PerspectiveServer),
),
receivePackets: prometheus.NewCounter(
receivePacketsOpts(logging.PerspectiveServer),
),
receiveBytes: prometheus.NewCounter(
receiveBytesOpts(logging.PerspectiveServer),
),
bufferedPackets: prometheus.NewCounterVec(
bufferedPacketsOpts(logging.PerspectiveServer),
[]string{"packet_type"},
),
droppedPackets: prometheus.NewCounterVec(
droppedPacketsOpts(logging.PerspectiveServer),
[]string{"packet_type", "reason"},
),
lostPackets: prometheus.NewCounterVec(
lostPacketsOpts(logging.PerspectiveServer),
[]string{"reason"},
),
rtt: prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: perspectiveString(logging.PerspectiveServer),
Name: "rtt",
Buckets: []float64{5, 10, 20, 30, 40, 50, 75, 100},
},
),
}
registerClient = sync.Once{}
registerServer = sync.Once{}
)
// MetricsCollector abstracts the difference between client and server metrics from connTracer
type MetricsCollector interface {
startedConnection()
closedConnection(err error)
sentPackets(logging.ByteCount)
receivedPackets(logging.ByteCount)
bufferedPackets(logging.PacketType)
droppedPackets(logging.PacketType, logging.ByteCount, logging.PacketDropReason)
lostPackets(logging.PacketLossReason)
updatedRTT(*logging.RTTStats)
}
func totalConnectionsOpts(p logging.Perspective) prometheus.CounterOpts {
var help string
if p == logging.PerspectiveClient {
help = "Number of connections initiated. For all quic metrics, client means the side initiating the connection"
} else {
help = "Number of connections accepted. For all quic metrics, server means the side accepting connections"
}
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "total_connections",
Help: help,
}
}
func closedConnectionsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "closed_connections",
Help: "Number of connections that has been closed",
}
}
func sentPacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "sent_packets",
Help: "Number of packets that have been sent through a connection",
}
}
func sentBytesOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "sent_bytes",
Help: "Number of bytes that have been sent through a connection",
}
}
func receivePacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "receive_packets",
Help: "Number of packets that have been received through a connection",
}
}
func receiveBytesOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "receive_bytes",
Help: "Number of bytes that have been received through a connection",
}
}
func bufferedPacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "buffered_packets",
Help: "Number of bytes that have been buffered on a connection",
}
}
func droppedPacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "dropped_packets",
Help: "Number of bytes that have been dropped on a connection",
}
}
func lostPacketsOpts(p logging.Perspective) prometheus.CounterOpts {
return prometheus.CounterOpts{
Namespace: namespace,
Subsystem: perspectiveString(p),
Name: "lost_packets",
Help: "Number of packets that have been lost from a connection",
}
}
type clientCollector struct {
index string
}
func newClientCollector(index uint8) MetricsCollector {
registerClient.Do(func() {
prometheus.MustRegister(
clientMetrics.totalConnections,
clientMetrics.closedConnections,
clientMetrics.sentPackets,
clientMetrics.sentBytes,
clientMetrics.receivePackets,
clientMetrics.receiveBytes,
clientMetrics.bufferedPackets,
clientMetrics.droppedPackets,
clientMetrics.lostPackets,
clientMetrics.minRTT,
clientMetrics.latestRTT,
clientMetrics.smoothedRTT,
)
})
return &clientCollector{
index: uint8ToString(index),
}
}
func (cc *clientCollector) startedConnection() {
clientMetrics.totalConnections.Inc()
}
func (cc *clientCollector) closedConnection(err error) {
clientMetrics.closedConnections.WithLabelValues(err.Error()).Inc()
}
func (cc *clientCollector) sentPackets(size logging.ByteCount) {
clientMetrics.sentPackets.WithLabelValues(cc.index).Inc()
clientMetrics.sentBytes.WithLabelValues(cc.index).Add(byteCountToPromCount(size))
}
func (cc *clientCollector) receivedPackets(size logging.ByteCount) {
clientMetrics.receivePackets.WithLabelValues(cc.index).Inc()
clientMetrics.receiveBytes.WithLabelValues(cc.index).Add(byteCountToPromCount(size))
}
func (cc *clientCollector) bufferedPackets(packetType logging.PacketType) {
clientMetrics.bufferedPackets.WithLabelValues(cc.index, packetTypeString(packetType)).Inc()
}
func (cc *clientCollector) droppedPackets(packetType logging.PacketType, size logging.ByteCount, reason logging.PacketDropReason) {
clientMetrics.droppedPackets.WithLabelValues(
cc.index,
packetTypeString(packetType),
packetDropReasonString(reason),
).Add(byteCountToPromCount(size))
}
func (cc *clientCollector) lostPackets(reason logging.PacketLossReason) {
clientMetrics.lostPackets.WithLabelValues(cc.index, packetLossReasonString(reason)).Inc()
}
func (cc *clientCollector) updatedRTT(rtt *logging.RTTStats) {
clientMetrics.minRTT.WithLabelValues(cc.index).Set(durationToPromGauge(rtt.MinRTT()))
clientMetrics.latestRTT.WithLabelValues(cc.index).Set(durationToPromGauge(rtt.LatestRTT()))
clientMetrics.smoothedRTT.WithLabelValues(cc.index).Set(durationToPromGauge(rtt.SmoothedRTT()))
}
type serverCollector struct{}
func newServiceCollector() MetricsCollector {
registerServer.Do(func() {
prometheus.MustRegister(
serverMetrics.totalConnections,
serverMetrics.closedConnections,
serverMetrics.sentPackets,
serverMetrics.sentBytes,
serverMetrics.receivePackets,
serverMetrics.receiveBytes,
serverMetrics.bufferedPackets,
serverMetrics.droppedPackets,
serverMetrics.lostPackets,
serverMetrics.rtt,
)
})
return &serverCollector{}
}
func (sc *serverCollector) startedConnection() {
serverMetrics.totalConnections.Inc()
}
func (sc *serverCollector) closedConnection(err error) {
serverMetrics.closedConnections.Inc()
}
func (sc *serverCollector) sentPackets(size logging.ByteCount) {
serverMetrics.sentPackets.Inc()
serverMetrics.sentBytes.Add(byteCountToPromCount(size))
}
func (sc *serverCollector) receivedPackets(size logging.ByteCount) {
serverMetrics.receivePackets.Inc()
serverMetrics.receiveBytes.Add(byteCountToPromCount(size))
}
func (sc *serverCollector) bufferedPackets(packetType logging.PacketType) {
serverMetrics.bufferedPackets.WithLabelValues(packetTypeString(packetType)).Inc()
}
func (sc *serverCollector) droppedPackets(packetType logging.PacketType, size logging.ByteCount, reason logging.PacketDropReason) {
serverMetrics.droppedPackets.WithLabelValues(
packetTypeString(packetType),
packetDropReasonString(reason),
).Add(byteCountToPromCount(size))
}
func (sc *serverCollector) lostPackets(reason logging.PacketLossReason) {
serverMetrics.lostPackets.WithLabelValues(packetLossReasonString(reason)).Inc()
}
func (sc *serverCollector) updatedRTT(rtt *logging.RTTStats) {
latestRTT := rtt.LatestRTT()
// May return 0 if no valid updates have occured
if latestRTT > 0 {
serverMetrics.rtt.Observe(durationToPromGauge(latestRTT))
}
}

126
quic/tracing.go Normal file
View File

@@ -0,0 +1,126 @@
package quic
import (
"context"
"net"
"github.com/lucas-clemente/quic-go/logging"
"github.com/lucas-clemente/quic-go/qlog"
"github.com/rs/zerolog"
)
// QUICTracer is a wrapper to create new quicConnTracer
type tracer struct {
logger *zerolog.Logger
config *tracerConfig
}
type tracerConfig struct {
isClient bool
// Only client has an index
index uint8
}
func NewClientTracer(logger *zerolog.Logger, index uint8) logging.Tracer {
return &tracer{
logger: logger,
config: &tracerConfig{
isClient: true,
index: index,
},
}
}
func NewServerTracer(logger *zerolog.Logger) logging.Tracer {
return &tracer{
logger: logger,
config: &tracerConfig{
isClient: false,
},
}
}
func (t *tracer) TracerForConnection(_ context.Context, p logging.Perspective, odcid logging.ConnectionID) logging.ConnectionTracer {
connID := logging.ConnectionID(odcid).String()
ql := &quicLogger{
logger: t.logger,
connectionID: connID,
}
if t.config.isClient {
return newConnTracer(ql, p, odcid, newClientCollector(t.config.index))
}
return newConnTracer(ql, p, odcid, newServiceCollector())
}
func (*tracer) SentPacket(net.Addr, *logging.Header, logging.ByteCount, []logging.Frame) {}
func (*tracer) DroppedPacket(net.Addr, logging.PacketType, logging.ByteCount, logging.PacketDropReason) {
}
// connTracer is a wrapper around https://pkg.go.dev/github.com/lucas-clemente/quic-go@v0.23.0/qlog#NewConnectionTracer to collect metrics
type connTracer struct {
logging.ConnectionTracer
metricsCollector MetricsCollector
connectionID string
}
func newConnTracer(ql *quicLogger, p logging.Perspective, odcid logging.ConnectionID, metricsCollector MetricsCollector) logging.ConnectionTracer {
return &connTracer{
qlog.NewConnectionTracer(ql, p, odcid),
metricsCollector,
logging.ConnectionID(odcid).String(),
}
}
func (ct *connTracer) StartedConnection(local, remote net.Addr, srcConnID, destConnID logging.ConnectionID) {
ct.metricsCollector.startedConnection()
ct.ConnectionTracer.StartedConnection(local, remote, srcConnID, destConnID)
}
func (ct *connTracer) ClosedConnection(err error) {
ct.metricsCollector.closedConnection(err)
ct.ConnectionTracer.ClosedConnection(err)
}
func (ct *connTracer) SentPacket(hdr *logging.ExtendedHeader, packetSize logging.ByteCount, ack *logging.AckFrame, frames []logging.Frame) {
ct.metricsCollector.sentPackets(packetSize)
ct.ConnectionTracer.SentPacket(hdr, packetSize, ack, frames)
}
func (ct *connTracer) ReceivedPacket(hdr *logging.ExtendedHeader, size logging.ByteCount, frames []logging.Frame) {
ct.metricsCollector.receivedPackets(size)
ct.ConnectionTracer.ReceivedPacket(hdr, size, frames)
}
func (ct *connTracer) BufferedPacket(pt logging.PacketType) {
ct.metricsCollector.bufferedPackets(pt)
ct.ConnectionTracer.BufferedPacket(pt)
}
func (ct *connTracer) DroppedPacket(pt logging.PacketType, size logging.ByteCount, reason logging.PacketDropReason) {
ct.metricsCollector.droppedPackets(pt, size, reason)
ct.ConnectionTracer.DroppedPacket(pt, size, reason)
}
func (ct *connTracer) LostPacket(level logging.EncryptionLevel, number logging.PacketNumber, reason logging.PacketLossReason) {
ct.metricsCollector.lostPackets(reason)
ct.ConnectionTracer.LostPacket(level, number, reason)
}
func (ct *connTracer) UpdatedMetrics(rttStats *logging.RTTStats, cwnd, bytesInFlight logging.ByteCount, packetsInFlight int) {
ct.metricsCollector.updatedRTT(rttStats)
ct.ConnectionTracer.UpdatedMetrics(rttStats, cwnd, bytesInFlight, packetsInFlight)
}
type quicLogger struct {
logger *zerolog.Logger
connectionID string
}
func (qt *quicLogger) Write(p []byte) (n int, err error) {
qt.logger.Trace().Str("quicConnection", qt.connectionID).RawJSON("event", p).Msg("Quic event")
return len(p), nil
}
func (*quicLogger) Close() error {
return nil
}