mirror of
https://github.com/pomerium/pomerium.git
synced 2025-08-04 01:09:36 +02:00
Refactor trace config to match supported otel options (#5447)
* Refactor trace config to match supported otel options * use duration instead of int64 for otel timeouts * change 'trace client updated' log level to debug
This commit is contained in:
parent
3e90f1e244
commit
5e94b2f8f1
22 changed files with 1067 additions and 721 deletions
|
@ -8,8 +8,9 @@ import (
|
|||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"github.com/pomerium/pomerium/config/otelconfig"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
|
||||
|
@ -23,12 +24,19 @@ var (
|
|||
ErrClientStopped = errors.New("client is stopped")
|
||||
)
|
||||
|
||||
// SyncClient wraps an underlying [otlptrace.Client] which can be swapped out
|
||||
// for a different client (e.g. in response to a config update) safely and in
|
||||
// a way that does not lose spans.
|
||||
type SyncClient interface {
|
||||
otlptrace.Client
|
||||
|
||||
Update(ctx context.Context, newClient otlptrace.Client) error
|
||||
}
|
||||
|
||||
// NewSyncClient creates a new [SyncClient] with an initial underlying client.
|
||||
//
|
||||
// The client can be nil; if so, calling any method on the SyncClient will
|
||||
// return ErrNoClient.
|
||||
func NewSyncClient(client otlptrace.Client) SyncClient {
|
||||
return &syncClient{
|
||||
client: client,
|
||||
|
@ -63,17 +71,24 @@ func (ac *syncClient) Stop(ctx context.Context) error {
|
|||
if ac.waitForNewClient != nil {
|
||||
panic("bug: Stop called concurrently")
|
||||
}
|
||||
if ac.client == nil {
|
||||
return ErrNoClient
|
||||
}
|
||||
return ac.resetLocked(ctx, nil)
|
||||
}
|
||||
|
||||
func (ac *syncClient) resetLocked(ctx context.Context, newClient otlptrace.Client) error {
|
||||
if ac.client == nil {
|
||||
return ErrNoClient
|
||||
var stop func(context.Context) error
|
||||
if ac.client != nil {
|
||||
stop = ac.client.Stop
|
||||
}
|
||||
ac.waitForNewClient = make(chan struct{})
|
||||
ac.mu.Unlock()
|
||||
|
||||
err := ac.client.Stop(ctx)
|
||||
var err error
|
||||
if stop != nil {
|
||||
err = stop(ctx)
|
||||
}
|
||||
|
||||
ac.mu.Lock()
|
||||
close(ac.waitForNewClient)
|
||||
|
@ -123,53 +138,70 @@ func (ac *syncClient) Update(ctx context.Context, newClient otlptrace.Client) er
|
|||
return ac.resetLocked(ctx, newClient)
|
||||
}
|
||||
|
||||
// NewRemoteClientFromEnv creates an otlp trace client using the well-known
|
||||
// environment variables defined in the [OpenTelemetry documentation].
|
||||
//
|
||||
// [OpenTelemetry documentation]: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/
|
||||
func NewRemoteClientFromEnv() otlptrace.Client {
|
||||
if os.Getenv("OTEL_SDK_DISABLED") == "true" {
|
||||
return NoopClient{}
|
||||
func NewTraceClientFromConfig(opts otelconfig.Config) (otlptrace.Client, error) {
|
||||
if IsOtelSDKDisabled() {
|
||||
return NoopClient{}, nil
|
||||
}
|
||||
|
||||
exporter, ok := os.LookupEnv("OTEL_TRACES_EXPORTER")
|
||||
if !ok {
|
||||
exporter = "none"
|
||||
if opts.OtelTracesExporter == nil {
|
||||
return NoopClient{}, nil
|
||||
}
|
||||
|
||||
switch strings.ToLower(strings.TrimSpace(exporter)) {
|
||||
case "none", "noop", "":
|
||||
return NoopClient{}
|
||||
switch *opts.OtelTracesExporter {
|
||||
case "otlp":
|
||||
var protocol string
|
||||
if v, ok := os.LookupEnv("OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"); ok {
|
||||
protocol = v
|
||||
} else if v, ok := os.LookupEnv("OTEL_EXPORTER_OTLP_PROTOCOL"); ok {
|
||||
protocol = v
|
||||
var endpoint, protocol string
|
||||
var signalSpecificEndpoint bool
|
||||
|
||||
if opts.OtelExporterOtlpTracesEndpoint != nil {
|
||||
endpoint = *opts.OtelExporterOtlpTracesEndpoint
|
||||
signalSpecificEndpoint = true
|
||||
} else if opts.OtelExporterOtlpEndpoint != nil {
|
||||
endpoint = *opts.OtelExporterOtlpEndpoint
|
||||
signalSpecificEndpoint = false
|
||||
}
|
||||
if opts.OtelExporterOtlpTracesProtocol != nil {
|
||||
protocol = *opts.OtelExporterOtlpTracesProtocol
|
||||
} else if opts.OtelExporterOtlpProtocol != nil {
|
||||
protocol = *opts.OtelExporterOtlpProtocol
|
||||
} else {
|
||||
// try to guess the expected protocol from the port number
|
||||
var endpoint string
|
||||
var specific bool
|
||||
if v, ok := os.LookupEnv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"); ok {
|
||||
endpoint = v
|
||||
specific = true
|
||||
} else if v, ok := os.LookupEnv("OTEL_EXPORTER_OTLP_ENDPOINT"); ok {
|
||||
endpoint = v
|
||||
protocol = BestEffortProtocolFromOTLPEndpoint(endpoint, signalSpecificEndpoint)
|
||||
}
|
||||
|
||||
var headersList []string
|
||||
if len(opts.OtelExporterOtlpTracesHeaders) > 0 {
|
||||
headersList = opts.OtelExporterOtlpTracesHeaders
|
||||
} else if len(opts.OtelExporterOtlpHeaders) > 0 {
|
||||
headersList = opts.OtelExporterOtlpHeaders
|
||||
}
|
||||
headers := map[string]string{}
|
||||
for _, kv := range headersList {
|
||||
k, v, ok := strings.Cut(kv, "=")
|
||||
if ok {
|
||||
headers[k] = v
|
||||
}
|
||||
protocol = BestEffortProtocolFromOTLPEndpoint(endpoint, specific)
|
||||
}
|
||||
defaultTimeout := 10 * time.Second // otel default (not exported)
|
||||
if opts.OtelExporterOtlpTimeout != nil {
|
||||
defaultTimeout = max(0, time.Duration(*opts.OtelExporterOtlpTimeout)*time.Millisecond)
|
||||
}
|
||||
switch strings.ToLower(strings.TrimSpace(protocol)) {
|
||||
case "grpc":
|
||||
return otlptracegrpc.NewClient()
|
||||
return otlptracegrpc.NewClient(
|
||||
otlptracegrpc.WithEndpointURL(endpoint),
|
||||
otlptracegrpc.WithHeaders(headers),
|
||||
otlptracegrpc.WithTimeout(defaultTimeout),
|
||||
), nil
|
||||
case "http/protobuf", "":
|
||||
return otlptracehttp.NewClient()
|
||||
return otlptracehttp.NewClient(
|
||||
otlptracehttp.WithEndpointURL(endpoint),
|
||||
otlptracehttp.WithHeaders(headers),
|
||||
otlptracehttp.WithTimeout(defaultTimeout),
|
||||
), nil
|
||||
default:
|
||||
otel.Handle(fmt.Errorf(`unknown otlp trace exporter protocol %q, expected "grpc" or "http/protobuf"`, protocol))
|
||||
return NoopClient{}
|
||||
return nil, fmt.Errorf(`unknown otlp trace exporter protocol %q, expected one of ["grpc", "http/protobuf"]`, protocol)
|
||||
}
|
||||
case "none", "noop", "":
|
||||
return NoopClient{}, nil
|
||||
default:
|
||||
otel.Handle(fmt.Errorf(`unknown otlp trace exporter %q, expected "otlp" or "none"`, exporter))
|
||||
return NoopClient{}
|
||||
return nil, fmt.Errorf(`unknown otlp trace exporter %q, expected one of ["otlp", "none"]`, *opts.OtelTracesExporter)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -258,34 +290,6 @@ func (n ValidNoopSpan) SpanContext() oteltrace.SpanContext {
|
|||
|
||||
var _ oteltrace.Span = ValidNoopSpan{}
|
||||
|
||||
func IsDisabledViaEnvironment() bool {
|
||||
if os.Getenv("OTEL_SDK_DISABLED") == "true" {
|
||||
return true
|
||||
}
|
||||
exporter, ok := os.LookupEnv("OTEL_TRACES_EXPORTER")
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
switch strings.ToLower(strings.TrimSpace(exporter)) {
|
||||
case "none, noop":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func IsEnabledViaEnvironment() bool {
|
||||
if os.Getenv("OTEL_SDK_DISABLED") == "true" {
|
||||
return false
|
||||
}
|
||||
exporter, ok := os.LookupEnv("OTEL_TRACES_EXPORTER")
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
switch strings.ToLower(strings.TrimSpace(exporter)) {
|
||||
case "none, noop", "":
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
func IsOtelSDKDisabled() bool {
|
||||
return os.Getenv("OTEL_SDK_DISABLED") == "true"
|
||||
}
|
||||
|
|
|
@ -3,12 +3,15 @@ package trace_test
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/pomerium/pomerium/config"
|
||||
"github.com/pomerium/pomerium/internal/log"
|
||||
"github.com/pomerium/pomerium/internal/telemetry/trace"
|
||||
"github.com/pomerium/pomerium/internal/testenv"
|
||||
|
@ -16,6 +19,7 @@ import (
|
|||
"github.com/pomerium/pomerium/internal/testenv/snippets"
|
||||
. "github.com/pomerium/pomerium/internal/testutil/tracetest" //nolint:revive
|
||||
"github.com/pomerium/pomerium/internal/testutil/tracetest/mock_otlptrace"
|
||||
"github.com/pomerium/pomerium/internal/version"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.opentelemetry.io/otel"
|
||||
|
@ -86,6 +90,49 @@ func TestSyncClient(t *testing.T) {
|
|||
assert.NoError(t, sc.Stop(context.Background()))
|
||||
})
|
||||
|
||||
t.Run("Update from nil client to non-nil client", func(t *testing.T) {
|
||||
ctrl := gomock.NewController(t)
|
||||
|
||||
sc := trace.NewSyncClient(nil)
|
||||
|
||||
mockClient := mock_otlptrace.NewMockClient(ctrl)
|
||||
start := mockClient.EXPECT().
|
||||
Start(gomock.Any()).
|
||||
Return(nil)
|
||||
upload := mockClient.EXPECT().
|
||||
UploadTraces(gomock.Any(), gomock.Any()).
|
||||
Return(nil).
|
||||
After(start)
|
||||
mockClient.EXPECT().
|
||||
Stop(gomock.Any()).
|
||||
Return(nil).
|
||||
After(upload)
|
||||
assert.NoError(t, sc.Update(context.Background(), mockClient))
|
||||
assert.NoError(t, sc.UploadTraces(context.Background(), []*tracev1.ResourceSpans{}))
|
||||
assert.NoError(t, sc.Stop(context.Background()))
|
||||
})
|
||||
|
||||
t.Run("Update from non-nil client to nil client", func(t *testing.T) {
|
||||
ctrl := gomock.NewController(t)
|
||||
|
||||
sc := trace.NewSyncClient(nil)
|
||||
|
||||
{
|
||||
mockClient := mock_otlptrace.NewMockClient(ctrl)
|
||||
start := mockClient.EXPECT().
|
||||
Start(gomock.Any()).
|
||||
Return(nil)
|
||||
mockClient.EXPECT().
|
||||
Stop(gomock.Any()).
|
||||
Return(nil).
|
||||
After(start)
|
||||
assert.NoError(t, sc.Update(context.Background(), mockClient))
|
||||
}
|
||||
|
||||
sc.Update(context.Background(), nil)
|
||||
assert.ErrorIs(t, sc.UploadTraces(context.Background(), []*tracev1.ResourceSpans{}), trace.ErrNoClient)
|
||||
})
|
||||
|
||||
spinWait := func(counter *atomic.Int32, until int32) error {
|
||||
startTime := time.Now()
|
||||
for counter.Load() != until {
|
||||
|
@ -257,6 +304,9 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
|
|||
grpcEndpoint := receiver.GRPCEndpointURL()
|
||||
httpEndpoint := receiver.HTTPEndpointURL()
|
||||
|
||||
emptyConfigFilePath := filepath.Join(env.TempDir(), "empty_config.yaml")
|
||||
require.NoError(t, os.WriteFile(emptyConfigFilePath, []byte("{}"), 0o644))
|
||||
|
||||
env.Start()
|
||||
snippets.WaitStartupComplete(env)
|
||||
|
||||
|
@ -266,6 +316,7 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
|
|||
newClientErr string
|
||||
uploadErr bool
|
||||
expectNoSpans bool
|
||||
expectHeaders map[string][]string
|
||||
}{
|
||||
{
|
||||
name: "GRPC endpoint, auto protocol",
|
||||
|
@ -344,7 +395,7 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
|
|||
env: map[string]string{
|
||||
"OTEL_TRACES_EXPORTER": "invalid",
|
||||
},
|
||||
newClientErr: `unknown otlp trace exporter "invalid", expected "otlp" or "none"`,
|
||||
newClientErr: `unknown otlp trace exporter "invalid", expected one of ["otlp", "none"]`,
|
||||
},
|
||||
{
|
||||
name: "invalid protocol",
|
||||
|
@ -353,7 +404,7 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
|
|||
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": grpcEndpoint.Value(),
|
||||
"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "invalid",
|
||||
},
|
||||
newClientErr: `unknown otlp trace exporter protocol "invalid", expected "grpc" or "http/protobuf"`,
|
||||
newClientErr: `unknown otlp trace exporter protocol "invalid", expected one of ["grpc", "http/protobuf"]`,
|
||||
},
|
||||
{
|
||||
name: "valid configuration, but sdk disabled",
|
||||
|
@ -392,25 +443,62 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
|
|||
"OTEL_EXPORTER_OTLP_ENDPOINT": grpcEndpoint.Value(),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "valid exporter, trace headers",
|
||||
env: map[string]string{
|
||||
"OTEL_TRACES_EXPORTER": "otlp",
|
||||
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": httpEndpoint.Value(),
|
||||
"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "http/protobuf",
|
||||
"OTEL_EXPORTER_OTLP_TRACES_HEADERS": "foo=bar,bar=baz",
|
||||
},
|
||||
expectHeaders: map[string][]string{
|
||||
"foo": {"bar"},
|
||||
"bar": {"baz"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "valid exporter, alt headers",
|
||||
env: map[string]string{
|
||||
"OTEL_TRACES_EXPORTER": "otlp",
|
||||
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": httpEndpoint.Value(),
|
||||
"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "http/protobuf",
|
||||
"OTEL_EXPORTER_OTLP_HEADERS": "foo=bar,bar=baz",
|
||||
},
|
||||
expectHeaders: map[string][]string{
|
||||
"foo": {"bar"},
|
||||
"bar": {"baz"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "headers variable precedence",
|
||||
env: map[string]string{
|
||||
"OTEL_TRACES_EXPORTER": "otlp",
|
||||
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": httpEndpoint.Value(),
|
||||
"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "http/protobuf",
|
||||
"OTEL_EXPORTER_OTLP_HEADERS": "a=1,b=2,c=3",
|
||||
"OTEL_EXPORTER_OTLP_TRACES_HEADERS": "a=2,d=4",
|
||||
},
|
||||
expectHeaders: map[string][]string{
|
||||
"a": {"2"},
|
||||
"d": {"4"},
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
for k, v := range tc.env {
|
||||
t.Setenv(k, v)
|
||||
}
|
||||
handler := &errHandler{}
|
||||
oldErrHandler := otel.GetErrorHandler()
|
||||
otel.SetErrorHandler(handler)
|
||||
t.Cleanup(func() { otel.SetErrorHandler(oldErrHandler) })
|
||||
|
||||
remoteClient := trace.NewRemoteClientFromEnv()
|
||||
ctx := trace.Options{
|
||||
RemoteClient: remoteClient,
|
||||
}.NewContext(log.Ctx(env.Context()).WithContext(context.Background()))
|
||||
cfg, err := config.NewFileOrEnvironmentSource(context.Background(), emptyConfigFilePath, version.FullVersion())
|
||||
require.NoError(t, err)
|
||||
|
||||
remoteClient, err := trace.NewTraceClientFromConfig(cfg.GetConfig().Options.Tracing)
|
||||
if tc.newClientErr != "" {
|
||||
assert.ErrorContains(t, handler.err, tc.newClientErr)
|
||||
assert.ErrorContains(t, err, tc.newClientErr)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
|
||||
ctx := trace.NewContext(log.Ctx(env.Context()).WithContext(context.Background()), remoteClient)
|
||||
|
||||
tp := trace.NewTracerProvider(ctx, t.Name())
|
||||
|
||||
|
@ -424,6 +512,11 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
|
|||
}
|
||||
assert.NoError(t, trace.ShutdownContext(ctx))
|
||||
|
||||
if tc.expectHeaders != nil {
|
||||
for _, req := range receiver.ReceivedRequests() {
|
||||
assert.Subset(t, req.Metadata, tc.expectHeaders, "missing expected headers")
|
||||
}
|
||||
}
|
||||
results := NewTraceResults(receiver.FlushResourceSpans())
|
||||
if tc.expectNoSpans {
|
||||
results.MatchTraces(t, MatchOptions{Exact: true})
|
||||
|
|
|
@ -3,6 +3,7 @@ package trace
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
|
@ -41,7 +42,7 @@ type ExporterServer struct {
|
|||
func NewServer(ctx context.Context) *ExporterServer {
|
||||
sys := systemContextFromContext(ctx)
|
||||
ex := &ExporterServer{
|
||||
remoteClient: sys.options.RemoteClient,
|
||||
remoteClient: sys.remoteClient,
|
||||
observer: sys.observer,
|
||||
server: grpc.NewServer(grpc.Creds(insecure.NewCredentials())),
|
||||
}
|
||||
|
@ -53,7 +54,9 @@ func (srv *ExporterServer) Start(ctx context.Context) {
|
|||
lis := bufconn.Listen(2 * 1024 * 1024)
|
||||
go func() {
|
||||
if err := srv.remoteClient.Start(ctx); err != nil {
|
||||
panic(err)
|
||||
if !errors.Is(err, ErrNoClient) {
|
||||
panic(fmt.Errorf("bug: %w", err))
|
||||
}
|
||||
}
|
||||
_ = srv.server.Serve(lis)
|
||||
}()
|
||||
|
|
|
@ -19,20 +19,20 @@ import (
|
|||
)
|
||||
|
||||
type Options struct {
|
||||
DebugFlags DebugFlags
|
||||
RemoteClient otlptrace.Client
|
||||
DebugFlags DebugFlags
|
||||
}
|
||||
|
||||
func (op Options) NewContext(parent context.Context) context.Context {
|
||||
func (op Options) NewContext(parent context.Context, remoteClient otlptrace.Client) context.Context {
|
||||
if systemContextFromContext(parent) != nil {
|
||||
panic("parent already contains trace system context")
|
||||
}
|
||||
if op.RemoteClient == nil {
|
||||
op.RemoteClient = NewRemoteClientFromEnv()
|
||||
if remoteClient == nil {
|
||||
panic("remoteClient cannot be nil (use trace.NoopClient instead)")
|
||||
}
|
||||
sys := &systemContext{
|
||||
options: op,
|
||||
tpm: &tracerProviderManager{},
|
||||
options: op,
|
||||
remoteClient: remoteClient,
|
||||
tpm: &tracerProviderManager{},
|
||||
}
|
||||
if op.DebugFlags.Check(TrackSpanReferences) {
|
||||
sys.observer = newSpanObserver()
|
||||
|
@ -52,8 +52,8 @@ func (op Options) NewContext(parent context.Context) context.Context {
|
|||
// The parent context should be context.Background(), or a background context
|
||||
// containing a logger. If any context in the parent's hierarchy was created
|
||||
// by NewContext, this will panic.
|
||||
func NewContext(parent context.Context) context.Context {
|
||||
return Options{}.NewContext(parent)
|
||||
func NewContext(parent context.Context, remoteClient otlptrace.Client) context.Context {
|
||||
return Options{}.NewContext(parent, remoteClient)
|
||||
}
|
||||
|
||||
// NewTracerProvider creates a new [trace.TracerProvider] with the given service
|
||||
|
@ -154,7 +154,7 @@ func ExporterServerFromContext(ctx context.Context) coltracepb.TraceServiceServe
|
|||
|
||||
func RemoteClientFromContext(ctx context.Context) otlptrace.Client {
|
||||
if sys := systemContextFromContext(ctx); sys != nil {
|
||||
return sys.options.RemoteClient
|
||||
return sys.remoteClient
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -178,6 +178,7 @@ var systemContextKey systemContextKeyType
|
|||
|
||||
type systemContext struct {
|
||||
options Options
|
||||
remoteClient otlptrace.Client
|
||||
tpm *tracerProviderManager
|
||||
observer *spanObserver
|
||||
exporterServer *ExporterServer
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue