mirror of
https://github.com/pomerium/pomerium.git
synced 2025-06-04 11:52:53 +02:00
tracing: adjust envoy otel trace batching settings to match go sdk (#5446)
This commit is contained in:
parent
0bd6d8cc83
commit
6ea51149f9
6 changed files with 59 additions and 17 deletions
|
@ -150,8 +150,8 @@ func (b *Builder) BuildBootstrapDynamicResources(
|
|||
|
||||
// BuildBootstrapLayeredRuntime builds the layered runtime for the envoy bootstrap.
|
||||
func (b *Builder) BuildBootstrapLayeredRuntime(ctx context.Context) (*envoy_config_bootstrap_v3.LayeredRuntime, error) {
|
||||
flushIntervalMs := 5000
|
||||
minFlushSpans := 3
|
||||
flushIntervalMs := trace.BatchSpanProcessorScheduleDelay()
|
||||
minFlushSpans := trace.BatchSpanProcessorMaxExportBatchSize()
|
||||
if trace.DebugFlagsFromContext(ctx).Check(trace.EnvoyFlushEverySpan) {
|
||||
minFlushSpans = 1
|
||||
flushIntervalMs = math.MaxInt32
|
||||
|
@ -166,15 +166,12 @@ func (b *Builder) BuildBootstrapLayeredRuntime(ctx context.Context) (*envoy_conf
|
|||
"tracing": map[string]any{
|
||||
"opentelemetry": map[string]any{
|
||||
"flush_interval_ms": flushIntervalMs,
|
||||
// For most requests, envoy generates 3 spans:
|
||||
// Note: for most requests, envoy generates 3 spans:
|
||||
// - ingress (downstream->envoy)
|
||||
// - ext_authz check request (envoy->pomerium)
|
||||
// - egress (envoy->upstream)
|
||||
// The default value is 5, which usually leads to delayed exports.
|
||||
// This can be set lower, e.g. 1 to have envoy export every span
|
||||
// individually (useful for testing), but 3 is a reasonable default.
|
||||
// If set to 1, also set flush_interval_ms to a very large number to
|
||||
// effectively disable it.
|
||||
// Some requests only generate 2 spans, e.g. if there is no upstream
|
||||
// request made or auth fails.
|
||||
"min_flush_spans": minFlushSpans,
|
||||
},
|
||||
},
|
||||
|
|
|
@ -51,7 +51,7 @@ func TestBuilder_BuildBootstrapLayeredRuntime(t *testing.T) {
|
|||
"tracing": {
|
||||
"opentelemetry": {
|
||||
"flush_interval_ms": 5000,
|
||||
"min_flush_spans": 3
|
||||
"min_flush_spans": 512
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/pomerium/pomerium/config"
|
||||
"github.com/pomerium/pomerium/internal/testenv"
|
||||
"github.com/pomerium/pomerium/internal/testenv/envutil"
|
||||
"github.com/pomerium/pomerium/internal/testenv/scenarios"
|
||||
|
@ -20,16 +21,28 @@ import (
|
|||
var (
|
||||
numRoutes int
|
||||
dumpErrLogs bool
|
||||
enableTracing bool
|
||||
publicRoutes bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
flag.IntVar(&numRoutes, "routes", 100, "number of routes")
|
||||
flag.BoolVar(&dumpErrLogs, "dump-err-logs", false, "if the test fails, write all captured logs to a file (testdata/<test-name>)")
|
||||
flag.BoolVar(&enableTracing, "enable-tracing", false, "enable tracing")
|
||||
flag.BoolVar(&publicRoutes, "public-routes", false, "use public unauthenticated routes")
|
||||
}
|
||||
|
||||
func TestRequestLatency(t *testing.T) {
|
||||
resume := envutil.PauseProfiling(t)
|
||||
env := testenv.New(t, testenv.Silent())
|
||||
var env testenv.Environment
|
||||
if enableTracing {
|
||||
receiver := scenarios.NewOTLPTraceReceiver()
|
||||
env = testenv.New(t, testenv.Silent(), testenv.WithTraceClient(receiver.NewGRPCClient()))
|
||||
env.Add(receiver)
|
||||
} else {
|
||||
env = testenv.New(t, testenv.Silent())
|
||||
}
|
||||
|
||||
users := []*scenarios.User{}
|
||||
for i := range numRoutes {
|
||||
users = append(users, &scenarios.User{
|
||||
|
@ -47,9 +60,12 @@ func TestRequestLatency(t *testing.T) {
|
|||
routes := make([]testenv.Route, numRoutes)
|
||||
for i := range numRoutes {
|
||||
routes[i] = up.Route().
|
||||
From(env.SubdomainURL(fmt.Sprintf("from-%d", i))).
|
||||
// Policy(func(p *config.Policy) { p.AllowPublicUnauthenticatedAccess = true })
|
||||
PPL(fmt.Sprintf(`{"allow":{"and":["email":{"is":"user%d@example.com"}]}}`, i))
|
||||
From(env.SubdomainURL(fmt.Sprintf("from-%d", i)))
|
||||
if publicRoutes {
|
||||
routes[i] = routes[i].Policy(func(p *config.Policy) { p.AllowPublicUnauthenticatedAccess = true })
|
||||
} else {
|
||||
routes[i] = routes[i].PPL(fmt.Sprintf(`{"allow":{"and":["email":{"is":"user%d@example.com"}]}}`, i))
|
||||
}
|
||||
}
|
||||
env.AddUpstream(up)
|
||||
|
||||
|
|
|
@ -2,9 +2,12 @@ package trace
|
|||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"go.opentelemetry.io/contrib/propagators/autoprop"
|
||||
"go.opentelemetry.io/otel"
|
||||
sdktrace "go.opentelemetry.io/otel/sdk/trace"
|
||||
"go.opentelemetry.io/otel/trace"
|
||||
"go.opentelemetry.io/otel/trace/embedded"
|
||||
)
|
||||
|
@ -44,3 +47,25 @@ var _ trace.Tracer = panicTracer{}
|
|||
func (p panicTracer) Start(context.Context, string, ...trace.SpanStartOption) (context.Context, trace.Span) {
|
||||
panic("global tracer used")
|
||||
}
|
||||
|
||||
// functions below mimic those with the same name in otel/sdk/internal/env/env.go
|
||||
|
||||
func BatchSpanProcessorScheduleDelay() int {
|
||||
const defaultValue = sdktrace.DefaultScheduleDelay
|
||||
if v, ok := os.LookupEnv("OTEL_BSP_SCHEDULE_DELAY"); ok {
|
||||
if n, err := strconv.Atoi(v); err == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func BatchSpanProcessorMaxExportBatchSize() int {
|
||||
const defaultValue = sdktrace.DefaultMaxExportBatchSize
|
||||
if v, ok := os.LookupEnv("OTEL_BSP_MAX_EXPORT_BATCH_SIZE"); ok {
|
||||
if n, err := strconv.Atoi(v); err == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ func NewServer(ctx context.Context) *ExporterServer {
|
|||
}
|
||||
|
||||
func (srv *ExporterServer) Start(ctx context.Context) {
|
||||
lis := bufconn.Listen(4096)
|
||||
lis := bufconn.Listen(2 * 1024 * 1024)
|
||||
go func() {
|
||||
if err := srv.remoteClient.Start(ctx); err != nil {
|
||||
panic(err)
|
||||
|
@ -95,5 +95,6 @@ func (srv *ExporterServer) Shutdown(ctx context.Context) error {
|
|||
if err := srv.remoteClient.Stop(ctx); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
srv.cc.Close()
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
|
|
@ -75,7 +75,10 @@ func (rec *OTLPTraceReceiver) Attach(ctx context.Context) {
|
|||
}
|
||||
|
||||
// Modify implements testenv.Modifier.
|
||||
func (rec *OTLPTraceReceiver) Modify(*config.Config) {}
|
||||
func (rec *OTLPTraceReceiver) Modify(cfg *config.Config) {
|
||||
cfg.Options.TracingProvider = "otlp"
|
||||
cfg.Options.TracingOTLPEndpoint = rec.GRPCEndpointURL().Value()
|
||||
}
|
||||
|
||||
func (rec *OTLPTraceReceiver) handleV1Traces(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Header.Get("Content-Type") != "application/x-protobuf" {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue