Add Client Manager to monitor connection state and trigger reconnect (#39)
- Support for exponential backoff on reconnect to be gentle on the server. - Clean up client by moving metrics and retry strategy to the connection manager. - Update echo_client to use client manager - Fix echo client XMPP message matching Fixes #21 Improvements for #8disco_info_form
parent
6cdadc95e9
commit
2f391fde80
@ -0,0 +1,101 @@
|
||||
/*
|
||||
Interesting reference on backoff:
|
||||
- Exponential Backoff And Jitter (AWS Blog):
|
||||
https://www.awsarchitectureblog.com/2015/03/backoff.html
|
||||
|
||||
We use Jitter as a default for exponential backoff, as the goal of
|
||||
this module is not to provide precise 'ticks', but good behaviour to
|
||||
implement retries that are helping the server to recover faster in
|
||||
case of congestion.
|
||||
|
||||
It can be used in several ways:
|
||||
- Using duration to get next sleep time.
|
||||
- Using ticker channel to trigger callback function on tick
|
||||
|
||||
The functions for Backoff are not threadsafe, but you can:
|
||||
- Keep the attempt counter on your end and use DurationForAttempt(int)
|
||||
- Use lock in your own code to protect the Backoff structure.
|
||||
|
||||
TODO: Implement Backoff Ticker channel
|
||||
TODO: Implement throttler interface. Throttler could be used to implement various reconnect strategies.
|
||||
*/
|
||||
|
||||
package xmpp // import "gosrc.io/xmpp"
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultBase int = 20 // Backoff base, in ms
|
||||
defaultFactor int = 2
|
||||
defaultCap int = 180000 // 3 minutes
|
||||
)
|
||||
|
||||
// Backoff can provide increasing duration with the number of attempt
|
||||
// performed. The structure is used to support exponential backoff on
|
||||
// connection attempts to avoid hammering the server we are connecting
|
||||
// to.
|
||||
type Backoff struct {
|
||||
NoJitter bool
|
||||
Base int
|
||||
Factor int
|
||||
Cap int
|
||||
lastDuration int
|
||||
attempt int
|
||||
}
|
||||
|
||||
// Duration returns the duration to apply to the current attempt.
|
||||
func (b *Backoff) Duration() time.Duration {
|
||||
d := b.DurationForAttempt(b.attempt)
|
||||
b.attempt++
|
||||
return d
|
||||
}
|
||||
|
||||
// Wait sleeps for backoff duration for current attempt.
|
||||
func (b *Backoff) Wait() {
|
||||
time.Sleep(b.Duration())
|
||||
}
|
||||
|
||||
// DurationForAttempt returns a duration for an attempt number, in a stateless way.
|
||||
func (b *Backoff) DurationForAttempt(attempt int) time.Duration {
|
||||
b.setDefault()
|
||||
expBackoff := math.Min(float64(b.Cap), float64(b.Base)*math.Pow(float64(b.Factor), float64(b.attempt)))
|
||||
d := int(math.Trunc(expBackoff))
|
||||
if !b.NoJitter {
|
||||
d = rand.Intn(d)
|
||||
}
|
||||
return time.Duration(d) * time.Millisecond
|
||||
}
|
||||
|
||||
// Reset sets back the number of attempts to 0. This is to be called after a successfull operation has been performed,
|
||||
// to reset the exponential backoff interval.
|
||||
func (b *Backoff) Reset() {
|
||||
b.attempt = 0
|
||||
}
|
||||
|
||||
func (b *Backoff) setDefault() {
|
||||
if b.Base == 0 {
|
||||
b.Base = defaultBase
|
||||
}
|
||||
|
||||
if b.Cap == 0 {
|
||||
b.Cap = defaultCap
|
||||
}
|
||||
|
||||
if b.Factor == 0 {
|
||||
b.Factor = defaultFactor
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
We use full jitter as default for now as it seems to provide good behaviour for reconnect.
|
||||
|
||||
Base is the default interval between attempts (if backoff Factor was equal to 1)
|
||||
|
||||
Attempt is the number of retry for operation. If we start attempt at 0, first sleep equals base.
|
||||
|
||||
Cap is the maximum sleep time duration we tolerate between attempts
|
||||
*/
|
@ -0,0 +1,24 @@
|
||||
package xmpp_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"gosrc.io/xmpp"
|
||||
)
|
||||
|
||||
func TestDurationForAttempt_NoJitter(t *testing.T) {
|
||||
b := xmpp.Backoff{Base: 25, NoJitter: true}
|
||||
bInMS := time.Duration(b.Base) * time.Millisecond
|
||||
if b.DurationForAttempt(0) != bInMS {
|
||||
t.Errorf("incorrect default duration for attempt #0 (%d) = %d", b.DurationForAttempt(0)/time.Millisecond, bInMS/time.Millisecond)
|
||||
}
|
||||
var prevDuration, d time.Duration
|
||||
for i := 0; i < 10; i++ {
|
||||
d = b.DurationForAttempt(i)
|
||||
if !(d >= prevDuration) {
|
||||
t.Errorf("duration should be increasing between attempts. #%d (%d) > %d", i, d, prevDuration)
|
||||
}
|
||||
prevDuration = d
|
||||
}
|
||||
}
|
@ -0,0 +1,106 @@
|
||||
package xmpp // import "gosrc.io/xmpp"
|
||||
|
||||
import (
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
type PostConnect func(c *Client)
|
||||
|
||||
// ClientManager supervises an XMPP client connection. Its role is to handle connection events and
|
||||
// apply reconnection strategy.
|
||||
type ClientManager struct {
|
||||
Client *Client
|
||||
Session *Session
|
||||
PostConnect PostConnect
|
||||
|
||||
// Store low level metrics
|
||||
Metrics *Metrics
|
||||
}
|
||||
|
||||
// NewClientManager creates a new client manager structure, intended to support
|
||||
// handling XMPP client state event changes and auto-trigger reconnection
|
||||
// based on ClientManager configuration.
|
||||
func NewClientManager(client *Client, pc PostConnect) *ClientManager {
|
||||
return &ClientManager{
|
||||
Client: client,
|
||||
PostConnect: pc,
|
||||
}
|
||||
}
|
||||
|
||||
// Start launch the connection loop
|
||||
func (cm *ClientManager) Start() {
|
||||
cm.Client.Handler = func(e Event) {
|
||||
switch e.State {
|
||||
case StateConnected:
|
||||
cm.Metrics.setConnectTime()
|
||||
case StateSessionEstablished:
|
||||
cm.Metrics.setLoginTime()
|
||||
case StateDisconnected:
|
||||
// Reconnect on disconnection
|
||||
cm.connect()
|
||||
}
|
||||
}
|
||||
cm.connect()
|
||||
}
|
||||
|
||||
// Stop cancels pending operations and terminates existing XMPP client.
|
||||
func (cm *ClientManager) Stop() {
|
||||
// Remove on disconnect handler to avoid triggering reconnect
|
||||
cm.Client.Handler = nil
|
||||
cm.Client.Disconnect()
|
||||
}
|
||||
|
||||
// connect manages the reconnection loop and apply the define backoff to avoid overloading the server.
|
||||
func (cm *ClientManager) connect() {
|
||||
var backoff Backoff // TODO: Group backoff calculation features with connection manager?
|
||||
|
||||
for {
|
||||
var err error
|
||||
cm.Metrics = initMetrics()
|
||||
|
||||
if cm.Client.Session, err = cm.Client.Connect(); err != nil {
|
||||
log.Printf("Connection error: %v\n", err)
|
||||
backoff.Wait()
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if cm.PostConnect != nil {
|
||||
cm.PostConnect(cm.Client)
|
||||
}
|
||||
}
|
||||
|
||||
// Client Metrics
|
||||
// ============================================================================
|
||||
|
||||
type Metrics struct {
|
||||
startTime time.Time
|
||||
// ConnectTime returns the duration between client initiation of the TCP/IP
|
||||
// connection to the server and actual TCP/IP session establishment.
|
||||
// This time includes DNS resolution and can be slightly higher if the DNS
|
||||
// resolution result was not in cache.
|
||||
ConnectTime time.Duration
|
||||
// LoginTime returns the between client initiation of the TCP/IP
|
||||
// connection to the server and the return of the login result.
|
||||
// This includes ConnectTime, but also XMPP level protocol negociation
|
||||
// like starttls.
|
||||
LoginTime time.Duration
|
||||
}
|
||||
|
||||
// initMetrics set metrics with default value and define the starting point
|
||||
// for duration calculation (connect time, login time, etc).
|
||||
func initMetrics() *Metrics {
|
||||
return &Metrics{
|
||||
startTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Metrics) setConnectTime() {
|
||||
m.ConnectTime = time.Since(m.startTime)
|
||||
}
|
||||
|
||||
func (m *Metrics) setLoginTime() {
|
||||
m.LoginTime = time.Since(m.startTime)
|
||||
}
|
Loading…
Reference in new issue