mirror of
https://github.com/pomerium/pomerium.git
synced 2025-05-10 07:37:33 +02:00
zero: refactor telemetry and controller (#5135)
* zero: refactor controller * refactor zero telemetry and controller * wire with connect handler * cr
This commit is contained in:
parent
cc636be707
commit
114f730dba
22 changed files with 612 additions and 342 deletions
|
@ -5,22 +5,20 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/cenkalti/backoff/v4"
|
||||
"github.com/rs/zerolog"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/pomerium/pomerium/internal/log"
|
||||
"github.com/pomerium/pomerium/internal/zero/analytics"
|
||||
sdk "github.com/pomerium/pomerium/internal/zero/api"
|
||||
"github.com/pomerium/pomerium/internal/zero/bootstrap"
|
||||
"github.com/pomerium/pomerium/internal/zero/bootstrap/writers"
|
||||
"github.com/pomerium/pomerium/internal/zero/healthcheck"
|
||||
connect_mux "github.com/pomerium/pomerium/internal/zero/connect-mux"
|
||||
"github.com/pomerium/pomerium/internal/zero/reconciler"
|
||||
"github.com/pomerium/pomerium/internal/zero/reporter"
|
||||
"github.com/pomerium/pomerium/internal/zero/telemetry/reporter"
|
||||
"github.com/pomerium/pomerium/pkg/cmd/pomerium"
|
||||
"github.com/pomerium/pomerium/pkg/grpc/databroker"
|
||||
"github.com/pomerium/pomerium/pkg/zero/connect"
|
||||
)
|
||||
|
||||
// Run runs Pomerium is managed mode using the provided token.
|
||||
|
@ -61,7 +59,6 @@ func Run(ctx context.Context, opts ...Option) error {
|
|||
eg.Go(func() error { return run(ctx, "zero-bootstrap", c.runBootstrap) })
|
||||
eg.Go(func() error { return run(ctx, "pomerium-core", c.runPomeriumCore) })
|
||||
eg.Go(func() error { return run(ctx, "zero-control-loop", c.runZeroControlLoop) })
|
||||
eg.Go(func() error { return run(ctx, "healh-check-reporter", c.runHealthCheckReporter) })
|
||||
return eg.Wait()
|
||||
}
|
||||
|
||||
|
@ -70,7 +67,8 @@ type controller struct {
|
|||
|
||||
api *sdk.API
|
||||
|
||||
bootstrapConfig *bootstrap.Source
|
||||
bootstrapConfig *bootstrap.Source
|
||||
telemetryReporter *reporter.Reporter
|
||||
}
|
||||
|
||||
func (c *controller) initAPI(ctx context.Context) error {
|
||||
|
@ -85,7 +83,6 @@ func (c *controller) initAPI(ctx context.Context) error {
|
|||
}
|
||||
|
||||
c.api = api
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -134,14 +131,35 @@ func (c *controller) runZeroControlLoop(ctx context.Context) error {
|
|||
r := c.NewDatabrokerRestartRunner(ctx)
|
||||
defer r.Close()
|
||||
|
||||
return r.Run(ctx,
|
||||
WithLease(
|
||||
c.runReconcilerLeased,
|
||||
c.runAnalyticsLeased,
|
||||
c.runMetricsReporterLeased,
|
||||
c.runHealthChecksLeased,
|
||||
),
|
||||
)
|
||||
err = c.initTelemetry(ctx, func() (databroker.DataBrokerServiceClient, error) {
|
||||
client, _, err := r.getDatabrokerClient()
|
||||
return client, err
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("init telemetry: %w", err)
|
||||
}
|
||||
defer c.shutdownTelemetry(ctx)
|
||||
|
||||
err = c.api.Watch(ctx, connect_mux.WithOnTelemetryRequested(func(ctx context.Context, _ *connect.TelemetryRequest) {
|
||||
c.telemetryReporter.CollectAndExportMetrics(ctx)
|
||||
}))
|
||||
if err != nil {
|
||||
return fmt.Errorf("watch telemetry: %w", err)
|
||||
}
|
||||
|
||||
eg, ctx := errgroup.WithContext(ctx)
|
||||
eg.Go(func() error {
|
||||
return r.Run(ctx,
|
||||
WithLease(
|
||||
c.runReconcilerLeased,
|
||||
c.runSessionAnalyticsLeased,
|
||||
c.enableSessionAnalyticsReporting,
|
||||
c.runHealthChecksLeased,
|
||||
),
|
||||
)
|
||||
})
|
||||
eg.Go(func() error { return c.runTelemetryReporter(ctx) })
|
||||
return eg.Wait()
|
||||
}
|
||||
|
||||
func (c *controller) runReconcilerLeased(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
||||
|
@ -154,54 +172,3 @@ func (c *controller) runReconcilerLeased(ctx context.Context, client databroker.
|
|||
reconciler.WithDataBrokerClient(client),
|
||||
)
|
||||
}
|
||||
|
||||
func (c *controller) runAnalyticsLeased(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
||||
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
||||
return c.Str("service", "zero-analytics")
|
||||
})
|
||||
|
||||
err := analytics.Collect(ctx, client, time.Hour)
|
||||
if err != nil && ctx.Err() == nil {
|
||||
log.Ctx(ctx).Error().Err(err).Msg("error collecting analytics, disabling")
|
||||
return nil
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *controller) runMetricsReporterLeased(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
||||
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
||||
return c.Str("service", "zero-reporter")
|
||||
})
|
||||
|
||||
return c.api.ReportMetrics(ctx,
|
||||
reporter.WithCollectInterval(time.Hour),
|
||||
reporter.WithMetrics(analytics.Metrics(func() databroker.DataBrokerServiceClient { return client })...),
|
||||
)
|
||||
}
|
||||
|
||||
func (c *controller) runHealthChecksLeased(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
||||
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
||||
return c.Str("service", "zero-health-checks")
|
||||
})
|
||||
|
||||
return healthcheck.RunChecks(ctx, c.bootstrapConfig, client)
|
||||
}
|
||||
|
||||
func (c *controller) runHealthCheckReporter(ctx context.Context) error {
|
||||
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
||||
return c.Str("service", "zero-health-check-reporter")
|
||||
})
|
||||
|
||||
bo := backoff.NewExponentialBackOff()
|
||||
bo.MaxElapsedTime = 0
|
||||
return backoff.RetryNotify(
|
||||
func() error {
|
||||
return c.api.ReportHealthChecks(ctx)
|
||||
},
|
||||
backoff.WithContext(bo, ctx),
|
||||
func(err error, next time.Duration) {
|
||||
log.Ctx(ctx).Warn().Err(err).Dur("next", next).Msg("health check reporter backoff")
|
||||
},
|
||||
)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue