From 55cce29741df743f5812a6ff5585390c0f1ed43a Mon Sep 17 00:00:00 2001 From: Denis Mishin Date: Tue, 5 Dec 2023 10:48:01 -0500 Subject: [PATCH] zero: add more verbose logging about background control loops --- internal/retry/config.go | 1 + internal/retry/retry.go | 36 ++++++++++++++++++++++++-- internal/zero/controller/controller.go | 22 +++++++++++++++- internal/zero/controller/reconciler.go | 14 ---------- internal/zero/reconciler/sync.go | 1 + 5 files changed, 57 insertions(+), 17 deletions(-) delete mode 100644 internal/zero/controller/reconciler.go diff --git a/internal/retry/config.go b/internal/retry/config.go index a5929c77f..f55e6282e 100644 --- a/internal/retry/config.go +++ b/internal/retry/config.go @@ -61,6 +61,7 @@ func newConfig(opts ...Option) ([]watch, backoff.BackOff) { bo := backoff.NewExponentialBackOff() bo.MaxInterval = cfg.maxInterval bo.MaxElapsedTime = 0 + bo.Multiplier = 2 return cfg.watches, bo } diff --git a/internal/retry/retry.go b/internal/retry/retry.go index 02042e349..f71bb3ca4 100644 --- a/internal/retry/retry.go +++ b/internal/retry/retry.go @@ -7,6 +7,10 @@ import ( "fmt" "reflect" "time" + + "github.com/rs/zerolog" + + "github.com/pomerium/pomerium/internal/log" ) // Retry retries a function (with exponential back-off) until it succeeds. @@ -20,6 +24,10 @@ func Retry( fn func(context.Context) error, opts ...Option, ) error { + ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context { + return c.Str("control-loop", name) + }) + watches, backoff := newConfig(opts...) ticker := time.NewTicker(backoff.NextBackOff()) defer ticker.Stop() @@ -32,16 +40,22 @@ restart: if err == nil { return nil } + if IsTerminalError(err) { + log.Ctx(ctx).Error().Err(err).Msg("terminal error") return err } + log.Ctx(ctx).Warn().Msg(err.Error()) backoff.Reset() backoff: for { - ticker.Reset(backoff.NextBackOff()) + interval := backoff.NextBackOff() + ticker.Reset(interval) + log.Ctx(ctx).Warn().Msgf("backing off for %s...", interval.String()) next, err := s.Exec(ctx) + logNext(ctx, next, err) switch next { case nextRestart: continue restart @@ -56,6 +70,24 @@ restart: } } +func logNext(ctx context.Context, next next, err error) { + evt := log.Ctx(ctx).Info() + if err != nil { + evt = log.Ctx(ctx).Warn().Err(err) + } + + switch next { + case nextRestart: + evt.Msg("retrying...") + case nextBackoff: + evt.Msg("will retry after backoff") + case nextExit: + evt.Msg("exiting") + default: + evt.Msg("unknown next state") + } +} + type selectCase struct { watches []watch cases []reflect.SelectCase @@ -132,7 +164,7 @@ func onError(w watch, err error) (next, error) { } if w.this { - return nextBackoff, fmt.Errorf("retry %s failed: %w", w.name, err) + return nextBackoff, err } panic("unreachable, as watches are wrapped in retries and may only return terminal errors") diff --git a/internal/zero/controller/controller.go b/internal/zero/controller/controller.go index d1377168e..8e6501f62 100644 --- a/internal/zero/controller/controller.go +++ b/internal/zero/controller/controller.go @@ -6,10 +6,12 @@ import ( "errors" "fmt" + "github.com/rs/zerolog" "golang.org/x/sync/errgroup" "github.com/pomerium/pomerium/internal/log" "github.com/pomerium/pomerium/internal/zero/bootstrap" + "github.com/pomerium/pomerium/internal/zero/reconciler" "github.com/pomerium/pomerium/pkg/cmd/pomerium" "github.com/pomerium/pomerium/pkg/grpc/databroker" sdk "github.com/pomerium/zero-sdk" @@ -39,7 +41,7 @@ func Run(ctx context.Context, opts ...Option) error { eg.Go(func() error { return run(ctx, "connect", c.runConnect, nil) }) eg.Go(func() error { return run(ctx, "zero-bootstrap", c.runBootstrap, nil) }) eg.Go(func() error { return run(ctx, "pomerium-core", c.runPomeriumCore, src.WaitReady) }) - eg.Go(func() error { return run(ctx, "zero-reconciler", c.RunReconciler, src.WaitReady) }) + eg.Go(func() error { return run(ctx, "zero-reconciler", c.runReconciler, src.WaitReady) }) eg.Go(func() error { return run(ctx, "connect-log", c.RunConnectLog, nil) }) return eg.Wait() } @@ -87,6 +89,9 @@ func run(ctx context.Context, name string, runFn func(context.Context) error, wa } func (c *controller) runBootstrap(ctx context.Context) error { + ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context { + return c.Str("service", "zero-bootstrap") + }) return c.bootstrapConfig.Run(ctx, c.api, c.cfg.bootstrapConfigFileName) } @@ -95,5 +100,20 @@ func (c *controller) runPomeriumCore(ctx context.Context) error { } func (c *controller) runConnect(ctx context.Context) error { + ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context { + return c.Str("service", "zero-connect") + }) + return c.api.Connect(ctx) } + +func (c *controller) runReconciler(ctx context.Context) error { + ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context { + return c.Str("service", "zero-reconciler") + }) + + return reconciler.Run(ctx, + reconciler.WithAPI(c.api), + reconciler.WithDataBrokerClient(c.GetDataBrokerServiceClient()), + ) +} diff --git a/internal/zero/controller/reconciler.go b/internal/zero/controller/reconciler.go deleted file mode 100644 index f8ecd4aac..000000000 --- a/internal/zero/controller/reconciler.go +++ /dev/null @@ -1,14 +0,0 @@ -package controller - -import ( - "context" - - "github.com/pomerium/pomerium/internal/zero/reconciler" -) - -func (c *controller) RunReconciler(ctx context.Context) error { - return reconciler.Run(ctx, - reconciler.WithAPI(c.api), - reconciler.WithDataBrokerClient(c.GetDataBrokerServiceClient()), - ) -} diff --git a/internal/zero/reconciler/sync.go b/internal/zero/reconciler/sync.go index abe5fd2cd..234e41637 100644 --- a/internal/zero/reconciler/sync.go +++ b/internal/zero/reconciler/sync.go @@ -170,6 +170,7 @@ func (c *service) syncBundle(ctx context.Context, key string) error { Strs("record_types", bundleRecordTypes). Str("etag", current.ETag). Str("last_modified", current.LastModified). + Interface("metadata", result.Metadata). Msg("bundle synced") err = c.SetBundleCacheEntry(ctx, key, current)