zero: add more verbose logging about background control loops

This commit is contained in:
Denis Mishin 2023-12-05 10:48:01 -05:00
parent d2b2ad3250
commit 55cce29741
5 changed files with 57 additions and 17 deletions

View file

@ -61,6 +61,7 @@ func newConfig(opts ...Option) ([]watch, backoff.BackOff) {
bo := backoff.NewExponentialBackOff()
bo.MaxInterval = cfg.maxInterval
bo.MaxElapsedTime = 0
bo.Multiplier = 2
return cfg.watches, bo
}

View file

@ -7,6 +7,10 @@ import (
"fmt"
"reflect"
"time"
"github.com/rs/zerolog"
"github.com/pomerium/pomerium/internal/log"
)
// Retry retries a function (with exponential back-off) until it succeeds.
@ -20,6 +24,10 @@ func Retry(
fn func(context.Context) error,
opts ...Option,
) error {
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
return c.Str("control-loop", name)
})
watches, backoff := newConfig(opts...)
ticker := time.NewTicker(backoff.NextBackOff())
defer ticker.Stop()
@ -32,16 +40,22 @@ restart:
if err == nil {
return nil
}
if IsTerminalError(err) {
log.Ctx(ctx).Error().Err(err).Msg("terminal error")
return err
}
log.Ctx(ctx).Warn().Msg(err.Error())
backoff.Reset()
backoff:
for {
ticker.Reset(backoff.NextBackOff())
interval := backoff.NextBackOff()
ticker.Reset(interval)
log.Ctx(ctx).Warn().Msgf("backing off for %s...", interval.String())
next, err := s.Exec(ctx)
logNext(ctx, next, err)
switch next {
case nextRestart:
continue restart
@ -56,6 +70,24 @@ restart:
}
}
func logNext(ctx context.Context, next next, err error) {
evt := log.Ctx(ctx).Info()
if err != nil {
evt = log.Ctx(ctx).Warn().Err(err)
}
switch next {
case nextRestart:
evt.Msg("retrying...")
case nextBackoff:
evt.Msg("will retry after backoff")
case nextExit:
evt.Msg("exiting")
default:
evt.Msg("unknown next state")
}
}
type selectCase struct {
watches []watch
cases []reflect.SelectCase
@ -132,7 +164,7 @@ func onError(w watch, err error) (next, error) {
}
if w.this {
return nextBackoff, fmt.Errorf("retry %s failed: %w", w.name, err)
return nextBackoff, err
}
panic("unreachable, as watches are wrapped in retries and may only return terminal errors")

View file

@ -6,10 +6,12 @@ import (
"errors"
"fmt"
"github.com/rs/zerolog"
"golang.org/x/sync/errgroup"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/zero/bootstrap"
"github.com/pomerium/pomerium/internal/zero/reconciler"
"github.com/pomerium/pomerium/pkg/cmd/pomerium"
"github.com/pomerium/pomerium/pkg/grpc/databroker"
sdk "github.com/pomerium/zero-sdk"
@ -39,7 +41,7 @@ func Run(ctx context.Context, opts ...Option) error {
eg.Go(func() error { return run(ctx, "connect", c.runConnect, nil) })
eg.Go(func() error { return run(ctx, "zero-bootstrap", c.runBootstrap, nil) })
eg.Go(func() error { return run(ctx, "pomerium-core", c.runPomeriumCore, src.WaitReady) })
eg.Go(func() error { return run(ctx, "zero-reconciler", c.RunReconciler, src.WaitReady) })
eg.Go(func() error { return run(ctx, "zero-reconciler", c.runReconciler, src.WaitReady) })
eg.Go(func() error { return run(ctx, "connect-log", c.RunConnectLog, nil) })
return eg.Wait()
}
@ -87,6 +89,9 @@ func run(ctx context.Context, name string, runFn func(context.Context) error, wa
}
func (c *controller) runBootstrap(ctx context.Context) error {
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
return c.Str("service", "zero-bootstrap")
})
return c.bootstrapConfig.Run(ctx, c.api, c.cfg.bootstrapConfigFileName)
}
@ -95,5 +100,20 @@ func (c *controller) runPomeriumCore(ctx context.Context) error {
}
func (c *controller) runConnect(ctx context.Context) error {
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
return c.Str("service", "zero-connect")
})
return c.api.Connect(ctx)
}
func (c *controller) runReconciler(ctx context.Context) error {
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
return c.Str("service", "zero-reconciler")
})
return reconciler.Run(ctx,
reconciler.WithAPI(c.api),
reconciler.WithDataBrokerClient(c.GetDataBrokerServiceClient()),
)
}

View file

@ -1,14 +0,0 @@
package controller
import (
"context"
"github.com/pomerium/pomerium/internal/zero/reconciler"
)
func (c *controller) RunReconciler(ctx context.Context) error {
return reconciler.Run(ctx,
reconciler.WithAPI(c.api),
reconciler.WithDataBrokerClient(c.GetDataBrokerServiceClient()),
)
}

View file

@ -170,6 +170,7 @@ func (c *service) syncBundle(ctx context.Context, key string) error {
Strs("record_types", bundleRecordTypes).
Str("etag", current.ETag).
Str("last_modified", current.LastModified).
Interface("metadata", result.Metadata).
Msg("bundle synced")
err = c.SetBundleCacheEntry(ctx, key, current)