Refactor trace config to match supported otel options (#5447)

* Refactor trace config to match supported otel options

* use duration instead of int64 for otel timeouts

* change 'trace client updated' log level to debug
This commit is contained in:
Joe Kralicky 2025-01-30 11:59:19 -05:00 committed by GitHub
parent 3e90f1e244
commit 5e94b2f8f1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 1067 additions and 721 deletions

View file

@ -8,8 +8,9 @@ import (
"os"
"strings"
"sync"
"time"
"go.opentelemetry.io/otel"
"github.com/pomerium/pomerium/config/otelconfig"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
@ -23,12 +24,19 @@ var (
ErrClientStopped = errors.New("client is stopped")
)
// SyncClient wraps an underlying [otlptrace.Client] which can be swapped out
// for a different client (e.g. in response to a config update) safely and in
// a way that does not lose spans.
type SyncClient interface {
otlptrace.Client
Update(ctx context.Context, newClient otlptrace.Client) error
}
// NewSyncClient creates a new [SyncClient] with an initial underlying client.
//
// The client can be nil; if so, calling any method on the SyncClient will
// return ErrNoClient.
func NewSyncClient(client otlptrace.Client) SyncClient {
return &syncClient{
client: client,
@ -63,17 +71,24 @@ func (ac *syncClient) Stop(ctx context.Context) error {
if ac.waitForNewClient != nil {
panic("bug: Stop called concurrently")
}
if ac.client == nil {
return ErrNoClient
}
return ac.resetLocked(ctx, nil)
}
func (ac *syncClient) resetLocked(ctx context.Context, newClient otlptrace.Client) error {
if ac.client == nil {
return ErrNoClient
var stop func(context.Context) error
if ac.client != nil {
stop = ac.client.Stop
}
ac.waitForNewClient = make(chan struct{})
ac.mu.Unlock()
err := ac.client.Stop(ctx)
var err error
if stop != nil {
err = stop(ctx)
}
ac.mu.Lock()
close(ac.waitForNewClient)
@ -123,53 +138,70 @@ func (ac *syncClient) Update(ctx context.Context, newClient otlptrace.Client) er
return ac.resetLocked(ctx, newClient)
}
// NewRemoteClientFromEnv creates an otlp trace client using the well-known
// environment variables defined in the [OpenTelemetry documentation].
//
// [OpenTelemetry documentation]: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/
func NewRemoteClientFromEnv() otlptrace.Client {
if os.Getenv("OTEL_SDK_DISABLED") == "true" {
return NoopClient{}
func NewTraceClientFromConfig(opts otelconfig.Config) (otlptrace.Client, error) {
if IsOtelSDKDisabled() {
return NoopClient{}, nil
}
exporter, ok := os.LookupEnv("OTEL_TRACES_EXPORTER")
if !ok {
exporter = "none"
if opts.OtelTracesExporter == nil {
return NoopClient{}, nil
}
switch strings.ToLower(strings.TrimSpace(exporter)) {
case "none", "noop", "":
return NoopClient{}
switch *opts.OtelTracesExporter {
case "otlp":
var protocol string
if v, ok := os.LookupEnv("OTEL_EXPORTER_OTLP_TRACES_PROTOCOL"); ok {
protocol = v
} else if v, ok := os.LookupEnv("OTEL_EXPORTER_OTLP_PROTOCOL"); ok {
protocol = v
var endpoint, protocol string
var signalSpecificEndpoint bool
if opts.OtelExporterOtlpTracesEndpoint != nil {
endpoint = *opts.OtelExporterOtlpTracesEndpoint
signalSpecificEndpoint = true
} else if opts.OtelExporterOtlpEndpoint != nil {
endpoint = *opts.OtelExporterOtlpEndpoint
signalSpecificEndpoint = false
}
if opts.OtelExporterOtlpTracesProtocol != nil {
protocol = *opts.OtelExporterOtlpTracesProtocol
} else if opts.OtelExporterOtlpProtocol != nil {
protocol = *opts.OtelExporterOtlpProtocol
} else {
// try to guess the expected protocol from the port number
var endpoint string
var specific bool
if v, ok := os.LookupEnv("OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"); ok {
endpoint = v
specific = true
} else if v, ok := os.LookupEnv("OTEL_EXPORTER_OTLP_ENDPOINT"); ok {
endpoint = v
protocol = BestEffortProtocolFromOTLPEndpoint(endpoint, signalSpecificEndpoint)
}
var headersList []string
if len(opts.OtelExporterOtlpTracesHeaders) > 0 {
headersList = opts.OtelExporterOtlpTracesHeaders
} else if len(opts.OtelExporterOtlpHeaders) > 0 {
headersList = opts.OtelExporterOtlpHeaders
}
headers := map[string]string{}
for _, kv := range headersList {
k, v, ok := strings.Cut(kv, "=")
if ok {
headers[k] = v
}
protocol = BestEffortProtocolFromOTLPEndpoint(endpoint, specific)
}
defaultTimeout := 10 * time.Second // otel default (not exported)
if opts.OtelExporterOtlpTimeout != nil {
defaultTimeout = max(0, time.Duration(*opts.OtelExporterOtlpTimeout)*time.Millisecond)
}
switch strings.ToLower(strings.TrimSpace(protocol)) {
case "grpc":
return otlptracegrpc.NewClient()
return otlptracegrpc.NewClient(
otlptracegrpc.WithEndpointURL(endpoint),
otlptracegrpc.WithHeaders(headers),
otlptracegrpc.WithTimeout(defaultTimeout),
), nil
case "http/protobuf", "":
return otlptracehttp.NewClient()
return otlptracehttp.NewClient(
otlptracehttp.WithEndpointURL(endpoint),
otlptracehttp.WithHeaders(headers),
otlptracehttp.WithTimeout(defaultTimeout),
), nil
default:
otel.Handle(fmt.Errorf(`unknown otlp trace exporter protocol %q, expected "grpc" or "http/protobuf"`, protocol))
return NoopClient{}
return nil, fmt.Errorf(`unknown otlp trace exporter protocol %q, expected one of ["grpc", "http/protobuf"]`, protocol)
}
case "none", "noop", "":
return NoopClient{}, nil
default:
otel.Handle(fmt.Errorf(`unknown otlp trace exporter %q, expected "otlp" or "none"`, exporter))
return NoopClient{}
return nil, fmt.Errorf(`unknown otlp trace exporter %q, expected one of ["otlp", "none"]`, *opts.OtelTracesExporter)
}
}
@ -258,34 +290,6 @@ func (n ValidNoopSpan) SpanContext() oteltrace.SpanContext {
var _ oteltrace.Span = ValidNoopSpan{}
func IsDisabledViaEnvironment() bool {
if os.Getenv("OTEL_SDK_DISABLED") == "true" {
return true
}
exporter, ok := os.LookupEnv("OTEL_TRACES_EXPORTER")
if !ok {
return false
}
switch strings.ToLower(strings.TrimSpace(exporter)) {
case "none, noop":
return true
default:
return false
}
}
func IsEnabledViaEnvironment() bool {
if os.Getenv("OTEL_SDK_DISABLED") == "true" {
return false
}
exporter, ok := os.LookupEnv("OTEL_TRACES_EXPORTER")
if !ok {
return false
}
switch strings.ToLower(strings.TrimSpace(exporter)) {
case "none, noop", "":
return false
default:
return true
}
func IsOtelSDKDisabled() bool {
return os.Getenv("OTEL_SDK_DISABLED") == "true"
}

View file

@ -3,12 +3,15 @@ package trace_test
import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"sync/atomic"
"testing"
"time"
"github.com/pomerium/pomerium/config"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/telemetry/trace"
"github.com/pomerium/pomerium/internal/testenv"
@ -16,6 +19,7 @@ import (
"github.com/pomerium/pomerium/internal/testenv/snippets"
. "github.com/pomerium/pomerium/internal/testutil/tracetest" //nolint:revive
"github.com/pomerium/pomerium/internal/testutil/tracetest/mock_otlptrace"
"github.com/pomerium/pomerium/internal/version"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel"
@ -86,6 +90,49 @@ func TestSyncClient(t *testing.T) {
assert.NoError(t, sc.Stop(context.Background()))
})
t.Run("Update from nil client to non-nil client", func(t *testing.T) {
ctrl := gomock.NewController(t)
sc := trace.NewSyncClient(nil)
mockClient := mock_otlptrace.NewMockClient(ctrl)
start := mockClient.EXPECT().
Start(gomock.Any()).
Return(nil)
upload := mockClient.EXPECT().
UploadTraces(gomock.Any(), gomock.Any()).
Return(nil).
After(start)
mockClient.EXPECT().
Stop(gomock.Any()).
Return(nil).
After(upload)
assert.NoError(t, sc.Update(context.Background(), mockClient))
assert.NoError(t, sc.UploadTraces(context.Background(), []*tracev1.ResourceSpans{}))
assert.NoError(t, sc.Stop(context.Background()))
})
t.Run("Update from non-nil client to nil client", func(t *testing.T) {
ctrl := gomock.NewController(t)
sc := trace.NewSyncClient(nil)
{
mockClient := mock_otlptrace.NewMockClient(ctrl)
start := mockClient.EXPECT().
Start(gomock.Any()).
Return(nil)
mockClient.EXPECT().
Stop(gomock.Any()).
Return(nil).
After(start)
assert.NoError(t, sc.Update(context.Background(), mockClient))
}
sc.Update(context.Background(), nil)
assert.ErrorIs(t, sc.UploadTraces(context.Background(), []*tracev1.ResourceSpans{}), trace.ErrNoClient)
})
spinWait := func(counter *atomic.Int32, until int32) error {
startTime := time.Now()
for counter.Load() != until {
@ -257,6 +304,9 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
grpcEndpoint := receiver.GRPCEndpointURL()
httpEndpoint := receiver.HTTPEndpointURL()
emptyConfigFilePath := filepath.Join(env.TempDir(), "empty_config.yaml")
require.NoError(t, os.WriteFile(emptyConfigFilePath, []byte("{}"), 0o644))
env.Start()
snippets.WaitStartupComplete(env)
@ -266,6 +316,7 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
newClientErr string
uploadErr bool
expectNoSpans bool
expectHeaders map[string][]string
}{
{
name: "GRPC endpoint, auto protocol",
@ -344,7 +395,7 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
env: map[string]string{
"OTEL_TRACES_EXPORTER": "invalid",
},
newClientErr: `unknown otlp trace exporter "invalid", expected "otlp" or "none"`,
newClientErr: `unknown otlp trace exporter "invalid", expected one of ["otlp", "none"]`,
},
{
name: "invalid protocol",
@ -353,7 +404,7 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": grpcEndpoint.Value(),
"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "invalid",
},
newClientErr: `unknown otlp trace exporter protocol "invalid", expected "grpc" or "http/protobuf"`,
newClientErr: `unknown otlp trace exporter protocol "invalid", expected one of ["grpc", "http/protobuf"]`,
},
{
name: "valid configuration, but sdk disabled",
@ -392,25 +443,62 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
"OTEL_EXPORTER_OTLP_ENDPOINT": grpcEndpoint.Value(),
},
},
{
name: "valid exporter, trace headers",
env: map[string]string{
"OTEL_TRACES_EXPORTER": "otlp",
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": httpEndpoint.Value(),
"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "http/protobuf",
"OTEL_EXPORTER_OTLP_TRACES_HEADERS": "foo=bar,bar=baz",
},
expectHeaders: map[string][]string{
"foo": {"bar"},
"bar": {"baz"},
},
},
{
name: "valid exporter, alt headers",
env: map[string]string{
"OTEL_TRACES_EXPORTER": "otlp",
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": httpEndpoint.Value(),
"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "http/protobuf",
"OTEL_EXPORTER_OTLP_HEADERS": "foo=bar,bar=baz",
},
expectHeaders: map[string][]string{
"foo": {"bar"},
"bar": {"baz"},
},
},
{
name: "headers variable precedence",
env: map[string]string{
"OTEL_TRACES_EXPORTER": "otlp",
"OTEL_EXPORTER_OTLP_TRACES_ENDPOINT": httpEndpoint.Value(),
"OTEL_EXPORTER_OTLP_TRACES_PROTOCOL": "http/protobuf",
"OTEL_EXPORTER_OTLP_HEADERS": "a=1,b=2,c=3",
"OTEL_EXPORTER_OTLP_TRACES_HEADERS": "a=2,d=4",
},
expectHeaders: map[string][]string{
"a": {"2"},
"d": {"4"},
},
},
} {
t.Run(tc.name, func(t *testing.T) {
for k, v := range tc.env {
t.Setenv(k, v)
}
handler := &errHandler{}
oldErrHandler := otel.GetErrorHandler()
otel.SetErrorHandler(handler)
t.Cleanup(func() { otel.SetErrorHandler(oldErrHandler) })
remoteClient := trace.NewRemoteClientFromEnv()
ctx := trace.Options{
RemoteClient: remoteClient,
}.NewContext(log.Ctx(env.Context()).WithContext(context.Background()))
cfg, err := config.NewFileOrEnvironmentSource(context.Background(), emptyConfigFilePath, version.FullVersion())
require.NoError(t, err)
remoteClient, err := trace.NewTraceClientFromConfig(cfg.GetConfig().Options.Tracing)
if tc.newClientErr != "" {
assert.ErrorContains(t, handler.err, tc.newClientErr)
assert.ErrorContains(t, err, tc.newClientErr)
return
}
require.NoError(t, err)
ctx := trace.NewContext(log.Ctx(env.Context()).WithContext(context.Background()), remoteClient)
tp := trace.NewTracerProvider(ctx, t.Name())
@ -424,6 +512,11 @@ func TestNewRemoteClientFromEnv(t *testing.T) {
}
assert.NoError(t, trace.ShutdownContext(ctx))
if tc.expectHeaders != nil {
for _, req := range receiver.ReceivedRequests() {
assert.Subset(t, req.Metadata, tc.expectHeaders, "missing expected headers")
}
}
results := NewTraceResults(receiver.FlushResourceSpans())
if tc.expectNoSpans {
results.MatchTraces(t, MatchOptions{Exact: true})

View file

@ -3,6 +3,7 @@ package trace
import (
"context"
"errors"
"fmt"
"net"
"time"
@ -41,7 +42,7 @@ type ExporterServer struct {
func NewServer(ctx context.Context) *ExporterServer {
sys := systemContextFromContext(ctx)
ex := &ExporterServer{
remoteClient: sys.options.RemoteClient,
remoteClient: sys.remoteClient,
observer: sys.observer,
server: grpc.NewServer(grpc.Creds(insecure.NewCredentials())),
}
@ -53,7 +54,9 @@ func (srv *ExporterServer) Start(ctx context.Context) {
lis := bufconn.Listen(2 * 1024 * 1024)
go func() {
if err := srv.remoteClient.Start(ctx); err != nil {
panic(err)
if !errors.Is(err, ErrNoClient) {
panic(fmt.Errorf("bug: %w", err))
}
}
_ = srv.server.Serve(lis)
}()

View file

@ -19,20 +19,20 @@ import (
)
type Options struct {
DebugFlags DebugFlags
RemoteClient otlptrace.Client
DebugFlags DebugFlags
}
func (op Options) NewContext(parent context.Context) context.Context {
func (op Options) NewContext(parent context.Context, remoteClient otlptrace.Client) context.Context {
if systemContextFromContext(parent) != nil {
panic("parent already contains trace system context")
}
if op.RemoteClient == nil {
op.RemoteClient = NewRemoteClientFromEnv()
if remoteClient == nil {
panic("remoteClient cannot be nil (use trace.NoopClient instead)")
}
sys := &systemContext{
options: op,
tpm: &tracerProviderManager{},
options: op,
remoteClient: remoteClient,
tpm: &tracerProviderManager{},
}
if op.DebugFlags.Check(TrackSpanReferences) {
sys.observer = newSpanObserver()
@ -52,8 +52,8 @@ func (op Options) NewContext(parent context.Context) context.Context {
// The parent context should be context.Background(), or a background context
// containing a logger. If any context in the parent's hierarchy was created
// by NewContext, this will panic.
func NewContext(parent context.Context) context.Context {
return Options{}.NewContext(parent)
func NewContext(parent context.Context, remoteClient otlptrace.Client) context.Context {
return Options{}.NewContext(parent, remoteClient)
}
// NewTracerProvider creates a new [trace.TracerProvider] with the given service
@ -154,7 +154,7 @@ func ExporterServerFromContext(ctx context.Context) coltracepb.TraceServiceServe
func RemoteClientFromContext(ctx context.Context) otlptrace.Client {
if sys := systemContextFromContext(ctx); sys != nil {
return sys.options.RemoteClient
return sys.remoteClient
}
return nil
}
@ -178,6 +178,7 @@ var systemContextKey systemContextKeyType
type systemContext struct {
options Options
remoteClient otlptrace.Client
tpm *tracerProviderManager
observer *spanObserver
exporterServer *ExporterServer