mirror of
https://github.com/pomerium/pomerium.git
synced 2025-06-05 04:13:11 +02:00
healthcheck: add common package, zero reporter and first xds check (#5059)
This commit is contained in:
parent
5af244f0e5
commit
991fca496c
9 changed files with 319 additions and 39 deletions
11
go.mod
11
go.mod
|
@ -64,11 +64,14 @@ require (
|
||||||
github.com/volatiletech/null/v9 v9.0.0
|
github.com/volatiletech/null/v9 v9.0.0
|
||||||
github.com/yuin/gopher-lua v1.1.1
|
github.com/yuin/gopher-lua v1.1.1
|
||||||
go.opencensus.io v0.24.0
|
go.opencensus.io v0.24.0
|
||||||
go.opentelemetry.io/otel v1.24.0
|
go.opentelemetry.io/otel v1.25.0
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.24.0
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.24.0
|
||||||
go.opentelemetry.io/otel/metric v1.24.0
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.25.0
|
||||||
go.opentelemetry.io/otel/sdk v1.24.0
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.21.0
|
||||||
|
go.opentelemetry.io/otel/metric v1.25.0
|
||||||
|
go.opentelemetry.io/otel/sdk v1.25.0
|
||||||
go.opentelemetry.io/otel/sdk/metric v1.24.0
|
go.opentelemetry.io/otel/sdk/metric v1.24.0
|
||||||
|
go.opentelemetry.io/otel/trace v1.25.0
|
||||||
go.uber.org/automaxprocs v1.5.3
|
go.uber.org/automaxprocs v1.5.3
|
||||||
go.uber.org/zap v1.27.0
|
go.uber.org/zap v1.27.0
|
||||||
golang.org/x/crypto v0.21.0
|
golang.org/x/crypto v0.21.0
|
||||||
|
@ -204,9 +207,7 @@ require (
|
||||||
github.com/zeebo/blake3 v0.2.3 // indirect
|
github.com/zeebo/blake3 v0.2.3 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.22.0 // indirect
|
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0 // indirect
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0 // indirect
|
||||||
go.opentelemetry.io/otel/trace v1.24.0 // indirect
|
|
||||||
go.opentelemetry.io/proto/otlp v1.1.0 // indirect
|
go.opentelemetry.io/proto/otlp v1.1.0 // indirect
|
||||||
go.uber.org/multierr v1.11.0 // indirect
|
go.uber.org/multierr v1.11.0 // indirect
|
||||||
golang.org/x/mod v0.14.0 // indirect
|
golang.org/x/mod v0.14.0 // indirect
|
||||||
|
|
20
go.sum
20
go.sum
|
@ -722,24 +722,24 @@ go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.4
|
||||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0/go.mod h1:Mjt1i1INqiaoZOMGR1RIUJN+i3ChKoFRqzrRQhlkbs0=
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0/go.mod h1:Mjt1i1INqiaoZOMGR1RIUJN+i3ChKoFRqzrRQhlkbs0=
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 h1:jq9TW8u3so/bN+JPT166wjOI6/vQPF6Xe7nMNIltagk=
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0/go.mod h1:p8pYQP+m5XfbZm9fxtSKAbM6oIllS7s2AfxrChvc7iw=
|
||||||
go.opentelemetry.io/otel v1.24.0 h1:0LAOdjNmQeSTzGBzduGe/rU4tZhMwL5rWgtp9Ku5Jfo=
|
go.opentelemetry.io/otel v1.25.0 h1:gldB5FfhRl7OJQbUHt/8s0a7cE8fbsPAtdpRaApKy4k=
|
||||||
go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo=
|
go.opentelemetry.io/otel v1.25.0/go.mod h1:Wa2ds5NOXEMkCmUou1WA7ZBfLTHWIsp034OVD7AO+Vg=
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.24.0 h1:f2jriWfOdldanBwS9jNBdeOKAQN7b4ugAMaNu1/1k9g=
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.24.0 h1:f2jriWfOdldanBwS9jNBdeOKAQN7b4ugAMaNu1/1k9g=
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.24.0/go.mod h1:B+bcQI1yTY+N0vqMpoZbEN7+XU4tNM0DmUiOwebFJWI=
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.24.0/go.mod h1:B+bcQI1yTY+N0vqMpoZbEN7+XU4tNM0DmUiOwebFJWI=
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.22.0 h1:9M3+rhx7kZCIQQhQRYaZCdNu1V73tm4TvXs2ntl98C4=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.25.0 h1:dT33yIHtmsqpixFsSQPwNeY5drM9wTcoL8h0FWF4oGM=
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.22.0/go.mod h1:noq80iT8rrHP1SfybmPiRGc9dc5M8RPmGvtwo7Oo7tc=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.25.0/go.mod h1:h95q0LBGh7hlAC08X2DhSeyIG02YQ0UyioTCVAqRPmc=
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.21.0 h1:tIqheXEFWAZ7O8A7m+J0aPTmpJN3YQ7qetUAdkkkKpk=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.21.0 h1:tIqheXEFWAZ7O8A7m+J0aPTmpJN3YQ7qetUAdkkkKpk=
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.21.0/go.mod h1:nUeKExfxAQVbiVFn32YXpXZZHZ61Cc3s3Rn1pDBGAb0=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.21.0/go.mod h1:nUeKExfxAQVbiVFn32YXpXZZHZ61Cc3s3Rn1pDBGAb0=
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0 h1:FyjCyI9jVEfqhUh2MoSkmolPjfh5fp2hnV0b0irxH4Q=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0 h1:FyjCyI9jVEfqhUh2MoSkmolPjfh5fp2hnV0b0irxH4Q=
|
||||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0/go.mod h1:hYwym2nDEeZfG/motx0p7L7J1N1vyzIThemQsb4g2qY=
|
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.22.0/go.mod h1:hYwym2nDEeZfG/motx0p7L7J1N1vyzIThemQsb4g2qY=
|
||||||
go.opentelemetry.io/otel/metric v1.24.0 h1:6EhoGWWK28x1fbpA4tYTOWBkPefTDQnb8WSGXlc88kI=
|
go.opentelemetry.io/otel/metric v1.25.0 h1:LUKbS7ArpFL/I2jJHdJcqMGxkRdxpPHE0VU/D4NuEwA=
|
||||||
go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco=
|
go.opentelemetry.io/otel/metric v1.25.0/go.mod h1:rkDLUSd2lC5lq2dFNrX9LGAbINP5B7WBkC78RXCpH5s=
|
||||||
go.opentelemetry.io/otel/sdk v1.24.0 h1:YMPPDNymmQN3ZgczicBY3B6sf9n62Dlj9pWD3ucgoDw=
|
go.opentelemetry.io/otel/sdk v1.25.0 h1:PDryEJPC8YJZQSyLY5eqLeafHtG+X7FWnf3aXMtxbqo=
|
||||||
go.opentelemetry.io/otel/sdk v1.24.0/go.mod h1:KVrIYw6tEubO9E96HQpcmpTKDVn9gdv35HoYiQWGDFg=
|
go.opentelemetry.io/otel/sdk v1.25.0/go.mod h1:oFgzCM2zdsxKzz6zwpTZYLLQsFwc+K0daArPdIhuxkw=
|
||||||
go.opentelemetry.io/otel/sdk/metric v1.24.0 h1:yyMQrPzF+k88/DbH7o4FMAs80puqd+9osbiBrJrz/w8=
|
go.opentelemetry.io/otel/sdk/metric v1.24.0 h1:yyMQrPzF+k88/DbH7o4FMAs80puqd+9osbiBrJrz/w8=
|
||||||
go.opentelemetry.io/otel/sdk/metric v1.24.0/go.mod h1:I6Y5FjH6rvEnTTAYQz3Mmv2kl6Ek5IIrmwTLqMrrOE0=
|
go.opentelemetry.io/otel/sdk/metric v1.24.0/go.mod h1:I6Y5FjH6rvEnTTAYQz3Mmv2kl6Ek5IIrmwTLqMrrOE0=
|
||||||
go.opentelemetry.io/otel/trace v1.24.0 h1:CsKnnL4dUAr/0llH9FKuc698G04IrpWV0MQA/Y1YELI=
|
go.opentelemetry.io/otel/trace v1.25.0 h1:tqukZGLwQYRIFtSQM2u2+yfMVTgGVeqRLPUYx1Dq6RM=
|
||||||
go.opentelemetry.io/otel/trace v1.24.0/go.mod h1:HPc3Xr/cOApsBI154IU0OI0HJexz+aw5uPdbs3UCjNU=
|
go.opentelemetry.io/otel/trace v1.25.0/go.mod h1:hCCs70XM/ljO+BeQkyFnbK28SBIJ/Emuha+ccrCRT7I=
|
||||||
go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI=
|
go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI=
|
||||||
go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY=
|
go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY=
|
||||||
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
|
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
|
||||||
|
|
51
internal/controlplane/xdsmgr/log.go
Normal file
51
internal/controlplane/xdsmgr/log.go
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
package xdsmgr
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
envoy_config_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
|
||||||
|
envoy_config_listener_v3 "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
|
||||||
|
envoy_config_route_v3 "github.com/envoyproxy/go-control-plane/envoy/config/route/v3"
|
||||||
|
envoy_service_discovery_v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
|
||||||
|
|
||||||
|
"github.com/pomerium/pomerium/internal/log"
|
||||||
|
"github.com/pomerium/pomerium/pkg/health"
|
||||||
|
"github.com/pomerium/pomerium/pkg/protoutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
clusterTypeURL = protoutil.GetTypeURL((*envoy_config_cluster_v3.Cluster)(nil))
|
||||||
|
listenerTypeURL = protoutil.GetTypeURL((*envoy_config_listener_v3.Listener)(nil))
|
||||||
|
routeConfigurationTypeURL = protoutil.GetTypeURL((*envoy_config_route_v3.RouteConfiguration)(nil))
|
||||||
|
)
|
||||||
|
|
||||||
|
func logNACK(req *envoy_service_discovery_v3.DeltaDiscoveryRequest) {
|
||||||
|
log.Debug(context.Background()).
|
||||||
|
Str("type-url", req.GetTypeUrl()).
|
||||||
|
Any("error-detail", req.GetErrorDetail()).
|
||||||
|
Msg("xdsmgr: nack")
|
||||||
|
|
||||||
|
health.ReportError(getHealthCheck(req.GetTypeUrl()), errors.New(req.GetErrorDetail().GetMessage()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func logACK(req *envoy_service_discovery_v3.DeltaDiscoveryRequest) {
|
||||||
|
log.Debug(context.Background()).
|
||||||
|
Str("type-url", req.GetTypeUrl()).
|
||||||
|
Msg("xdsmgr: ack")
|
||||||
|
|
||||||
|
health.ReportOK(getHealthCheck(req.GetTypeUrl()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func getHealthCheck(typeURL string) health.Check {
|
||||||
|
switch typeURL {
|
||||||
|
case clusterTypeURL:
|
||||||
|
return health.XDSCluster
|
||||||
|
case listenerTypeURL:
|
||||||
|
return health.XDSListener
|
||||||
|
case routeConfigurationTypeURL:
|
||||||
|
return health.XDSRouteConfiguration
|
||||||
|
default:
|
||||||
|
return health.XDSOther
|
||||||
|
}
|
||||||
|
}
|
|
@ -107,26 +107,21 @@ func (mgr *Manager) DeltaAggregatedResources(
|
||||||
case req.GetResponseNonce() == "":
|
case req.GetResponseNonce() == "":
|
||||||
// neither an ACK or a NACK
|
// neither an ACK or a NACK
|
||||||
case req.GetErrorDetail() != nil:
|
case req.GetErrorDetail() != nil:
|
||||||
log.Debug(ctx).
|
|
||||||
Str("type-url", req.GetTypeUrl()).
|
|
||||||
Any("error-detail", req.GetErrorDetail()).
|
|
||||||
Msg("xdsmgr: nack")
|
|
||||||
// a NACK
|
// a NACK
|
||||||
// - set the client resource versions to the current resource versions
|
// - set the client resource versions to the current resource versions
|
||||||
state.clientResourceVersions = make(map[string]string)
|
state.clientResourceVersions = make(map[string]string)
|
||||||
for _, resource := range mgr.resources[req.GetTypeUrl()] {
|
for _, resource := range mgr.resources[req.GetTypeUrl()] {
|
||||||
state.clientResourceVersions[resource.Name] = resource.Version
|
state.clientResourceVersions[resource.Name] = resource.Version
|
||||||
}
|
}
|
||||||
|
logNACK(req)
|
||||||
case req.GetResponseNonce() == mgr.nonce:
|
case req.GetResponseNonce() == mgr.nonce:
|
||||||
log.Debug(ctx).
|
|
||||||
Str("type-url", req.GetTypeUrl()).
|
|
||||||
Msg("xdsmgr: ack")
|
|
||||||
// an ACK for the last response
|
// an ACK for the last response
|
||||||
// - set the client resource versions to the current resource versions
|
// - set the client resource versions to the current resource versions
|
||||||
state.clientResourceVersions = make(map[string]string)
|
state.clientResourceVersions = make(map[string]string)
|
||||||
for _, resource := range mgr.resources[req.GetTypeUrl()] {
|
for _, resource := range mgr.resources[req.GetTypeUrl()] {
|
||||||
state.clientResourceVersions[resource.Name] = resource.Version
|
state.clientResourceVersions[resource.Name] = resource.Version
|
||||||
}
|
}
|
||||||
|
logACK(req)
|
||||||
default:
|
default:
|
||||||
// an ACK for a response that's not the last response
|
// an ACK for a response that's not the last response
|
||||||
log.Debug(ctx).
|
log.Debug(ctx).
|
||||||
|
|
|
@ -12,7 +12,8 @@ import (
|
||||||
"github.com/pomerium/pomerium/internal/zero/apierror"
|
"github.com/pomerium/pomerium/internal/zero/apierror"
|
||||||
connect_mux "github.com/pomerium/pomerium/internal/zero/connect-mux"
|
connect_mux "github.com/pomerium/pomerium/internal/zero/connect-mux"
|
||||||
"github.com/pomerium/pomerium/internal/zero/grpcconn"
|
"github.com/pomerium/pomerium/internal/zero/grpcconn"
|
||||||
"github.com/pomerium/pomerium/internal/zero/reporter"
|
"github.com/pomerium/pomerium/internal/zero/healthcheck"
|
||||||
|
metrics_reporter "github.com/pomerium/pomerium/internal/zero/reporter"
|
||||||
token_api "github.com/pomerium/pomerium/internal/zero/token"
|
token_api "github.com/pomerium/pomerium/internal/zero/token"
|
||||||
"github.com/pomerium/pomerium/pkg/fanout"
|
"github.com/pomerium/pomerium/pkg/fanout"
|
||||||
cluster_api "github.com/pomerium/pomerium/pkg/zero/cluster"
|
cluster_api "github.com/pomerium/pomerium/pkg/zero/cluster"
|
||||||
|
@ -23,6 +24,7 @@ import (
|
||||||
type API struct {
|
type API struct {
|
||||||
cfg *config
|
cfg *config
|
||||||
cluster cluster_api.ClientWithResponsesInterface
|
cluster cluster_api.ClientWithResponsesInterface
|
||||||
|
telemetryConn *grpc.ClientConn
|
||||||
mux *connect_mux.Mux
|
mux *connect_mux.Mux
|
||||||
downloadURLCache *cluster_api.URLCache
|
downloadURLCache *cluster_api.URLCache
|
||||||
tokenFn func(ctx context.Context, ttl time.Duration) (string, error)
|
tokenFn func(ctx context.Context, ttl time.Duration) (string, error)
|
||||||
|
@ -75,24 +77,31 @@ func NewAPI(ctx context.Context, opts ...Option) (*API, error) {
|
||||||
return nil, fmt.Errorf("error creating connect grpc client: %w", err)
|
return nil, fmt.Errorf("error creating connect grpc client: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
telemetryGRPCConn, err := grpcconn.New(ctx, cfg.otelEndpoint, func(ctx context.Context) (string, error) {
|
||||||
|
return tokenCache.GetToken(ctx, minTelemetryTokenTTL)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error creating OTEL exporter grpc client: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
return &API{
|
return &API{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
cluster: clusterClient,
|
cluster: clusterClient,
|
||||||
mux: connect_mux.New(connect_api.NewConnectClient(connectGRPCConn)),
|
mux: connect_mux.New(connect_api.NewConnectClient(connectGRPCConn)),
|
||||||
|
telemetryConn: telemetryGRPCConn,
|
||||||
downloadURLCache: cluster_api.NewURLCache(),
|
downloadURLCache: cluster_api.NewURLCache(),
|
||||||
tokenFn: tokenCache.GetToken,
|
tokenFn: tokenCache.GetToken,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Report runs metrics reporting to the cloud
|
// ReportMetrics runs metrics reporting to the cloud
|
||||||
func (api *API) Report(ctx context.Context, opts ...reporter.Option) error {
|
func (api *API) ReportMetrics(ctx context.Context, opts ...metrics_reporter.Option) error {
|
||||||
conn, err := grpcconn.New(ctx, api.cfg.otelEndpoint, func(ctx context.Context) (string, error) {
|
return metrics_reporter.Run(ctx, api.telemetryConn, opts...)
|
||||||
return api.tokenFn(ctx, minTelemetryTokenTTL)
|
}
|
||||||
})
|
|
||||||
if err != nil {
|
// ReportHealthChecks runs health check reporting to the cloud
|
||||||
return fmt.Errorf("error creating OTEL exporter grpc client: %w", err)
|
func (api *API) ReportHealthChecks(ctx context.Context) error {
|
||||||
}
|
return healthcheck.NewReporter(api.telemetryConn).Run(ctx)
|
||||||
return reporter.Run(ctx, conn, opts...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect connects to the connect API and allows watching for changes
|
// Connect connects to the connect API and allows watching for changes
|
||||||
|
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/cenkalti/backoff/v4"
|
||||||
"github.com/rs/zerolog"
|
"github.com/rs/zerolog"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
|
|
||||||
|
@ -42,6 +43,7 @@ func Run(ctx context.Context, opts ...Option) error {
|
||||||
eg.Go(func() error { return run(ctx, "zero-bootstrap", c.runBootstrap) })
|
eg.Go(func() error { return run(ctx, "zero-bootstrap", c.runBootstrap) })
|
||||||
eg.Go(func() error { return run(ctx, "pomerium-core", c.runPomeriumCore) })
|
eg.Go(func() error { return run(ctx, "pomerium-core", c.runPomeriumCore) })
|
||||||
eg.Go(func() error { return run(ctx, "zero-control-loop", c.runZeroControlLoop) })
|
eg.Go(func() error { return run(ctx, "zero-control-loop", c.runZeroControlLoop) })
|
||||||
|
eg.Go(func() error { return run(ctx, "healh-check-reporter", c.runHealthCheckReporter) })
|
||||||
return eg.Wait()
|
return eg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,13 +105,13 @@ func (c *controller) runConnect(ctx context.Context) error {
|
||||||
|
|
||||||
func (c *controller) runZeroControlLoop(ctx context.Context) error {
|
func (c *controller) runZeroControlLoop(ctx context.Context) error {
|
||||||
return leaser.Run(ctx, c.bootstrapConfig,
|
return leaser.Run(ctx, c.bootstrapConfig,
|
||||||
c.runReconciler,
|
c.runReconcilerLeased,
|
||||||
c.runAnalytics,
|
c.runAnalyticsLeased,
|
||||||
c.runReporter,
|
c.runMetricsReporterLeased,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *controller) runReconciler(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
func (c *controller) runReconcilerLeased(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
||||||
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
||||||
return c.Str("service", "zero-reconciler")
|
return c.Str("service", "zero-reconciler")
|
||||||
})
|
})
|
||||||
|
@ -120,7 +122,7 @@ func (c *controller) runReconciler(ctx context.Context, client databroker.DataBr
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *controller) runAnalytics(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
func (c *controller) runAnalyticsLeased(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
||||||
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
||||||
return c.Str("service", "zero-analytics")
|
return c.Str("service", "zero-analytics")
|
||||||
})
|
})
|
||||||
|
@ -134,13 +136,31 @@ func (c *controller) runAnalytics(ctx context.Context, client databroker.DataBro
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *controller) runReporter(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
func (c *controller) runMetricsReporterLeased(ctx context.Context, client databroker.DataBrokerServiceClient) error {
|
||||||
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
||||||
return c.Str("service", "zero-reporter")
|
return c.Str("service", "zero-reporter")
|
||||||
})
|
})
|
||||||
|
|
||||||
return c.api.Report(ctx,
|
return c.api.ReportMetrics(ctx,
|
||||||
reporter.WithCollectInterval(time.Hour),
|
reporter.WithCollectInterval(time.Hour),
|
||||||
reporter.WithMetrics(analytics.Metrics(func() databroker.DataBrokerServiceClient { return client })...),
|
reporter.WithMetrics(analytics.Metrics(func() databroker.DataBrokerServiceClient { return client })...),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *controller) runHealthCheckReporter(ctx context.Context) error {
|
||||||
|
ctx = log.WithContext(ctx, func(c zerolog.Context) zerolog.Context {
|
||||||
|
return c.Str("service", "zero-health-check-reporter")
|
||||||
|
})
|
||||||
|
|
||||||
|
bo := backoff.NewExponentialBackOff()
|
||||||
|
bo.MaxElapsedTime = 0
|
||||||
|
return backoff.RetryNotify(
|
||||||
|
func() error {
|
||||||
|
return c.api.ReportHealthChecks(ctx)
|
||||||
|
},
|
||||||
|
backoff.WithContext(bo, ctx),
|
||||||
|
func(err error, next time.Duration) {
|
||||||
|
log.Ctx(ctx).Warn().Err(err).Dur("next", next).Msg("health check reporter backoff")
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
126
internal/zero/healthcheck/provider.go
Normal file
126
internal/zero/healthcheck/provider.go
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
package healthcheck
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"go.opentelemetry.io/otel/attribute"
|
||||||
|
"go.opentelemetry.io/otel/codes"
|
||||||
|
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
|
||||||
|
export_grpc "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||||
|
"go.opentelemetry.io/otel/sdk/resource"
|
||||||
|
trace_sdk "go.opentelemetry.io/otel/sdk/trace"
|
||||||
|
semconv "go.opentelemetry.io/otel/semconv/v1.4.0"
|
||||||
|
"go.opentelemetry.io/otel/trace"
|
||||||
|
"google.golang.org/grpc"
|
||||||
|
|
||||||
|
"github.com/pomerium/pomerium/internal/log"
|
||||||
|
"github.com/pomerium/pomerium/internal/version"
|
||||||
|
"github.com/pomerium/pomerium/pkg/health"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Provider struct {
|
||||||
|
exporter *otlptrace.Exporter
|
||||||
|
tracer trace.Tracer
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ health.Provider = (*Provider)(nil)
|
||||||
|
|
||||||
|
const (
|
||||||
|
shutdownTimeout = 30 * time.Second
|
||||||
|
serviceName = "pomerium-managed-core"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewReporter creates a new unstarted health check reporter
|
||||||
|
func NewReporter(
|
||||||
|
conn *grpc.ClientConn,
|
||||||
|
) *Provider {
|
||||||
|
p := new(Provider)
|
||||||
|
p.init(conn)
|
||||||
|
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Provider) Run(ctx context.Context) error {
|
||||||
|
health.SetProvider(p)
|
||||||
|
defer health.SetProvider(nil)
|
||||||
|
|
||||||
|
// we want the exporter
|
||||||
|
xc, cancel := context.WithCancel(context.WithoutCancel(ctx))
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
err := p.exporter.Start(xc)
|
||||||
|
if err != nil {
|
||||||
|
// this should not happen for the gRPC exporter as its non-blocking
|
||||||
|
return fmt.Errorf("error starting health check exporter: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
<-ctx.Done()
|
||||||
|
return p.shutdown(xc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Provider) shutdown(ctx context.Context) error {
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, shutdownTimeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
return p.exporter.Shutdown(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Provider) init(conn *grpc.ClientConn) {
|
||||||
|
p.initExporter(conn)
|
||||||
|
p.initTracer()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Provider) initExporter(conn *grpc.ClientConn) {
|
||||||
|
p.exporter = export_grpc.NewUnstarted(export_grpc.WithGRPCConn(conn))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Provider) initTracer() {
|
||||||
|
processor := trace_sdk.NewBatchSpanProcessor(p.exporter)
|
||||||
|
provider := trace_sdk.NewTracerProvider(
|
||||||
|
trace_sdk.WithResource(p.getResource()),
|
||||||
|
trace_sdk.WithSampler(trace_sdk.AlwaysSample()),
|
||||||
|
trace_sdk.WithSpanProcessor(processor),
|
||||||
|
)
|
||||||
|
p.tracer = provider.Tracer(serviceName)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Provider) getResource() *resource.Resource {
|
||||||
|
attr := []attribute.KeyValue{
|
||||||
|
semconv.ServiceNameKey.String(serviceName),
|
||||||
|
semconv.ServiceVersionKey.String(version.FullVersion()),
|
||||||
|
}
|
||||||
|
|
||||||
|
hostname, err := os.Hostname()
|
||||||
|
if err == nil {
|
||||||
|
attr = append(attr, semconv.HostNameKey.String(hostname))
|
||||||
|
}
|
||||||
|
|
||||||
|
return resource.NewSchemaless(attr...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Provider) ReportOK(check health.Check, attr ...health.Attr) {
|
||||||
|
ctx := context.Background()
|
||||||
|
log.Ctx(ctx).Debug().Str("check", string(check)).Msg("health check ok")
|
||||||
|
_, span := p.tracer.Start(ctx, string(check))
|
||||||
|
span.SetStatus(codes.Ok, "")
|
||||||
|
setAttributes(span, attr...)
|
||||||
|
span.End()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Provider) ReportError(check health.Check, err error, attr ...health.Attr) {
|
||||||
|
ctx := context.Background()
|
||||||
|
log.Ctx(ctx).Warn().Str("check", string(check)).Err(err).Msg("health check error")
|
||||||
|
_, span := p.tracer.Start(ctx, string(check))
|
||||||
|
span.SetStatus(codes.Error, err.Error())
|
||||||
|
setAttributes(span, attr...)
|
||||||
|
span.End()
|
||||||
|
}
|
||||||
|
|
||||||
|
func setAttributes(span trace.Span, attr ...health.Attr) {
|
||||||
|
for _, a := range attr {
|
||||||
|
span.SetAttributes(attribute.String(a.Key, a.Value))
|
||||||
|
}
|
||||||
|
}
|
14
pkg/health/check.go
Normal file
14
pkg/health/check.go
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
package health
|
||||||
|
|
||||||
|
type Check string
|
||||||
|
|
||||||
|
const (
|
||||||
|
// XDSCluster checks whether the XDS Cluster resources were applied
|
||||||
|
XDSCluster = Check("xds.cluster")
|
||||||
|
// XDSListener checks whether the XDS Listener resources were applied
|
||||||
|
XDSListener = Check("xds.listener")
|
||||||
|
// XDSRouteConfiguration checks whether the XDS RouteConfiguration resources were applied
|
||||||
|
XDSRouteConfiguration = Check("xds.route-configuration")
|
||||||
|
// XDSOther is a catch-all for other XDS resources
|
||||||
|
XDSOther = Check("xds.other")
|
||||||
|
)
|
64
pkg/health/provider.go
Normal file
64
pkg/health/provider.go
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
package health
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Attr is a key-value pair that can be attached to a health check
|
||||||
|
type Attr struct {
|
||||||
|
Key string
|
||||||
|
Value string
|
||||||
|
}
|
||||||
|
|
||||||
|
// StrAttr creates a new string attribute
|
||||||
|
func StrAttr(key, value string) Attr {
|
||||||
|
return Attr{Key: key, Value: value}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReportOK reports that a check was successful
|
||||||
|
func ReportOK(check Check, attributes ...Attr) {
|
||||||
|
p := defaultProvider.Load()
|
||||||
|
if p != nil {
|
||||||
|
p.ReportOK(check, attributes...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReportError reports that a check failed
|
||||||
|
func ReportError(check Check, err error, attributes ...Attr) {
|
||||||
|
p := defaultProvider.Load()
|
||||||
|
if p != nil {
|
||||||
|
p.ReportError(check, err, attributes...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Provider is the interface that must be implemented by a health check reporter
|
||||||
|
type Provider interface {
|
||||||
|
ReportOK(check Check, attributes ...Attr)
|
||||||
|
ReportError(check Check, err error, attributes ...Attr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetProvider sets the health check provider
|
||||||
|
func SetProvider(p Provider) {
|
||||||
|
defaultProvider.Store(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
type providerStore struct {
|
||||||
|
sync.RWMutex
|
||||||
|
provider Provider
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *providerStore) Load() Provider {
|
||||||
|
p.RLock()
|
||||||
|
defer p.RUnlock()
|
||||||
|
|
||||||
|
return p.provider
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *providerStore) Store(provider Provider) {
|
||||||
|
p.Lock()
|
||||||
|
defer p.Unlock()
|
||||||
|
|
||||||
|
p.provider = provider
|
||||||
|
}
|
||||||
|
|
||||||
|
var defaultProvider providerStore
|
Loading…
Add table
Add a link
Reference in a new issue