mirror of
https://github.com/pomerium/pomerium.git
synced 2025-07-18 09:08:16 +02:00
performance improvements
This commit is contained in:
parent
a6f43f3c3c
commit
180c7e04af
4 changed files with 147 additions and 64 deletions
|
@ -9,7 +9,6 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/pomerium/pomerium/config"
|
|
||||||
"github.com/pomerium/pomerium/internal/testenv"
|
"github.com/pomerium/pomerium/internal/testenv"
|
||||||
"github.com/pomerium/pomerium/internal/testenv/envutil"
|
"github.com/pomerium/pomerium/internal/testenv/envutil"
|
||||||
"github.com/pomerium/pomerium/internal/testenv/scenarios"
|
"github.com/pomerium/pomerium/internal/testenv/scenarios"
|
||||||
|
@ -49,8 +48,8 @@ func TestRequestLatency(t *testing.T) {
|
||||||
for i := range numRoutes {
|
for i := range numRoutes {
|
||||||
routes[i] = up.Route().
|
routes[i] = up.Route().
|
||||||
From(env.SubdomainURL(fmt.Sprintf("from-%d", i))).
|
From(env.SubdomainURL(fmt.Sprintf("from-%d", i))).
|
||||||
Policy(func(p *config.Policy) { p.AllowPublicUnauthenticatedAccess = true })
|
// Policy(func(p *config.Policy) { p.AllowPublicUnauthenticatedAccess = true })
|
||||||
// PPL(fmt.Sprintf(`{"allow":{"and":["email":{"is":"user%d@example.com"}]}}`, i))
|
PPL(fmt.Sprintf(`{"allow":{"and":["email":{"is":"user%d@example.com"}]}}`, i))
|
||||||
}
|
}
|
||||||
env.AddUpstream(up)
|
env.AddUpstream(up)
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ package trace
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/binary"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
@ -51,20 +52,26 @@ func SetMaxCachedTraceIDs(num int32) {
|
||||||
maxCachedTraceIDs.Store(max(num, 0))
|
maxCachedTraceIDs.Store(max(num, 0))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type eviction struct {
|
||||||
|
traceID unique.Handle[oteltrace.TraceID]
|
||||||
|
buf *Buffer
|
||||||
|
}
|
||||||
|
|
||||||
type SpanExportQueue struct {
|
type SpanExportQueue struct {
|
||||||
mu sync.Mutex
|
closing chan struct{}
|
||||||
logger *zerolog.Logger
|
uploadC chan []*tracev1.ResourceSpans
|
||||||
|
requestC chan *coltracepb.ExportTraceServiceRequest
|
||||||
|
evictC chan eviction
|
||||||
client otlptrace.Client
|
client otlptrace.Client
|
||||||
pendingResourcesByTraceID *lru.Cache[unique.Handle[oteltrace.TraceID], *Buffer]
|
pendingResourcesByTraceID *lru.Cache[unique.Handle[oteltrace.TraceID], *Buffer]
|
||||||
knownTraceIDMappings *lru.Cache[unique.Handle[oteltrace.TraceID], unique.Handle[oteltrace.TraceID]]
|
knownTraceIDMappings *lru.Cache[unique.Handle[oteltrace.TraceID], unique.Handle[oteltrace.TraceID]]
|
||||||
uploadC chan []*tracev1.ResourceSpans
|
|
||||||
closing bool
|
|
||||||
closed chan struct{}
|
|
||||||
debugFlags DebugFlags
|
|
||||||
debugAllEnqueuedSpans map[oteltrace.SpanID]*tracev1.Span
|
|
||||||
tracker *spanTracker
|
tracker *spanTracker
|
||||||
observer *spanObserver
|
observer *spanObserver
|
||||||
debugEvents []DebugEvent
|
debugEvents []DebugEvent
|
||||||
|
logger *zerolog.Logger
|
||||||
|
debugFlags DebugFlags
|
||||||
|
debugAllEnqueuedSpans map[oteltrace.SpanID]*tracev1.Span
|
||||||
|
wg sync.WaitGroup
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSpanExportQueue(ctx context.Context, client otlptrace.Client) *SpanExportQueue {
|
func NewSpanExportQueue(ctx context.Context, client otlptrace.Client) *SpanExportQueue {
|
||||||
|
@ -76,8 +83,10 @@ func NewSpanExportQueue(ctx context.Context, client otlptrace.Client) *SpanExpor
|
||||||
q := &SpanExportQueue{
|
q := &SpanExportQueue{
|
||||||
logger: log.Ctx(ctx),
|
logger: log.Ctx(ctx),
|
||||||
client: client,
|
client: client,
|
||||||
|
closing: make(chan struct{}),
|
||||||
uploadC: make(chan []*tracev1.ResourceSpans, 64),
|
uploadC: make(chan []*tracev1.ResourceSpans, 64),
|
||||||
closed: make(chan struct{}),
|
requestC: make(chan *coltracepb.ExportTraceServiceRequest, 256),
|
||||||
|
evictC: make(chan eviction, 64),
|
||||||
debugFlags: debug,
|
debugFlags: debug,
|
||||||
debugAllEnqueuedSpans: make(map[oteltrace.SpanID]*tracev1.Span),
|
debugAllEnqueuedSpans: make(map[oteltrace.SpanID]*tracev1.Span),
|
||||||
tracker: newSpanTracker(observer, debug),
|
tracker: newSpanTracker(observer, debug),
|
||||||
|
@ -92,12 +101,14 @@ func NewSpanExportQueue(ctx context.Context, client otlptrace.Client) *SpanExpor
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
q.wg.Add(2)
|
||||||
go q.runUploader()
|
go q.runUploader()
|
||||||
|
go q.runProcessor()
|
||||||
return q
|
return q
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *SpanExportQueue) runUploader() {
|
func (q *SpanExportQueue) runUploader() {
|
||||||
defer close(q.closed)
|
defer q.wg.Done()
|
||||||
for resourceSpans := range q.uploadC {
|
for resourceSpans := range q.uploadC {
|
||||||
ctx, ca := context.WithTimeout(context.Background(), 10*time.Second)
|
ctx, ca := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
if err := q.client.UploadTraces(ctx, resourceSpans); err != nil {
|
if err := q.client.UploadTraces(ctx, resourceSpans); err != nil {
|
||||||
|
@ -107,26 +118,34 @@ func (q *SpanExportQueue) runUploader() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *SpanExportQueue) onEvict(traceID unique.Handle[oteltrace.TraceID], buf *Buffer) {
|
func (q *SpanExportQueue) runProcessor() {
|
||||||
if buf.IsEmpty() {
|
defer q.wg.Done()
|
||||||
// if the buffer is not empty, it was evicted automatically
|
for {
|
||||||
return
|
select {
|
||||||
} else if mapping, ok := q.knownTraceIDMappings.Get(traceID); ok && mapping == zeroTraceID {
|
case req := <-q.requestC:
|
||||||
q.logger.Debug().
|
q.processRequestLocked(req)
|
||||||
Str("traceID", traceID.Value().String()).
|
case ev := <-q.evictC:
|
||||||
Msg("dropping unsampled trace")
|
q.processEvictionLocked(ev)
|
||||||
return
|
case <-q.closing:
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case req := <-q.requestC:
|
||||||
|
q.processRequestLocked(req)
|
||||||
|
case ev := <-q.evictC:
|
||||||
|
q.processEvictionLocked(ev)
|
||||||
|
default: // all channels empty
|
||||||
|
close(q.uploadC)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
select {
|
func (q *SpanExportQueue) onEvict(traceID unique.Handle[oteltrace.TraceID], buf *Buffer) {
|
||||||
case q.uploadC <- buf.Flush():
|
q.evictC <- eviction{
|
||||||
q.logger.Warn().
|
traceID: traceID,
|
||||||
Str("traceID", traceID.Value().String()).
|
buf: buf,
|
||||||
Msg("trace export buffer is full, uploading oldest incomplete trace")
|
|
||||||
default:
|
|
||||||
q.logger.Warn().
|
|
||||||
Str("traceID", traceID.Value().String()).
|
|
||||||
Msg("trace export buffer and upload queues are full, dropping trace")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,13 +202,17 @@ func (q *SpanExportQueue) isKnownTracePendingLocked(id unique.Handle[oteltrace.T
|
||||||
|
|
||||||
var ErrShuttingDown = errors.New("exporter is shutting down")
|
var ErrShuttingDown = errors.New("exporter is shutting down")
|
||||||
|
|
||||||
func (q *SpanExportQueue) Enqueue(ctx context.Context, req *coltracepb.ExportTraceServiceRequest) error {
|
func (q *SpanExportQueue) Enqueue(_ context.Context, req *coltracepb.ExportTraceServiceRequest) error {
|
||||||
q.mu.Lock()
|
select {
|
||||||
defer q.mu.Unlock()
|
case <-q.closing:
|
||||||
if q.closing {
|
|
||||||
return ErrShuttingDown
|
return ErrShuttingDown
|
||||||
|
default:
|
||||||
|
q.requestC <- req
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (q *SpanExportQueue) processRequestLocked(req *coltracepb.ExportTraceServiceRequest) {
|
||||||
if q.debugFlags.Check(LogAllEvents) {
|
if q.debugFlags.Check(LogAllEvents) {
|
||||||
q.debugEvents = append(q.debugEvents, DebugEvent{
|
q.debugEvents = append(q.debugEvents, DebugEvent{
|
||||||
Timestamp: time.Now(),
|
Timestamp: time.Now(),
|
||||||
|
@ -265,7 +288,7 @@ func (q *SpanExportQueue) Enqueue(ctx context.Context, req *coltracepb.ExportTra
|
||||||
tp, err := ParseTraceparent(attr.GetValue().GetStringValue())
|
tp, err := ParseTraceparent(attr.GetValue().GetStringValue())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
data, _ := protojson.Marshal(span)
|
data, _ := protojson.Marshal(span)
|
||||||
log.Ctx(ctx).
|
q.logger.
|
||||||
Err(err).
|
Err(err).
|
||||||
Str("span", string(data)).
|
Str("span", string(data)).
|
||||||
Msg("error processing span")
|
Msg("error processing span")
|
||||||
|
@ -284,7 +307,7 @@ func (q *SpanExportQueue) Enqueue(ctx context.Context, req *coltracepb.ExportTra
|
||||||
value, err := oteltrace.SpanIDFromHex(attr.GetValue().GetStringValue())
|
value, err := oteltrace.SpanIDFromHex(attr.GetValue().GetStringValue())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
data, _ := protojson.Marshal(span)
|
data, _ := protojson.Marshal(span)
|
||||||
log.Ctx(ctx).
|
q.logger.
|
||||||
Err(err).
|
Err(err).
|
||||||
Str("span", string(data)).
|
Str("span", string(data)).
|
||||||
Msg("error processing span: invalid value for pomerium.external-parent-span")
|
Msg("error processing span: invalid value for pomerium.external-parent-span")
|
||||||
|
@ -354,7 +377,29 @@ func (q *SpanExportQueue) Enqueue(ctx context.Context, req *coltracepb.ExportTra
|
||||||
if resourceSpans := toUpload.Flush(); len(resourceSpans) > 0 {
|
if resourceSpans := toUpload.Flush(); len(resourceSpans) > 0 {
|
||||||
q.uploadC <- resourceSpans
|
q.uploadC <- resourceSpans
|
||||||
}
|
}
|
||||||
return nil
|
}
|
||||||
|
|
||||||
|
func (q *SpanExportQueue) processEvictionLocked(ev eviction) {
|
||||||
|
if ev.buf.IsEmpty() {
|
||||||
|
// if the buffer is not empty, it was evicted automatically
|
||||||
|
return
|
||||||
|
} else if mapping, ok := q.knownTraceIDMappings.Get(ev.traceID); ok && mapping == zeroTraceID {
|
||||||
|
q.logger.Debug().
|
||||||
|
Str("traceID", ev.traceID.Value().String()).
|
||||||
|
Msg("dropping unsampled trace")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case q.uploadC <- ev.buf.Flush():
|
||||||
|
q.logger.Warn().
|
||||||
|
Str("traceID", ev.traceID.Value().String()).
|
||||||
|
Msg("trace export buffer is full, uploading oldest incomplete trace")
|
||||||
|
default:
|
||||||
|
q.logger.Warn().
|
||||||
|
Str("traceID", ev.traceID.Value().String()).
|
||||||
|
Msg("trace export buffer and upload queues are full, dropping trace")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -382,20 +427,17 @@ func (q *SpanExportQueue) WaitForSpans(maxDuration time.Duration) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *SpanExportQueue) Close(ctx context.Context) error {
|
func (q *SpanExportQueue) Close(ctx context.Context) error {
|
||||||
q.mu.Lock()
|
closed := make(chan struct{})
|
||||||
q.closing = true
|
go func() {
|
||||||
close(q.uploadC)
|
q.wg.Wait()
|
||||||
q.mu.Unlock()
|
close(closed)
|
||||||
|
}()
|
||||||
|
close(q.closing)
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
log.Ctx(ctx).Error().Msg("exporter stopped before all traces could be exported")
|
log.Ctx(ctx).Error().Msg("exporter stopped before all traces could be exported")
|
||||||
// drain uploadC
|
|
||||||
for range q.uploadC {
|
|
||||||
}
|
|
||||||
return context.Cause(ctx)
|
return context.Cause(ctx)
|
||||||
case <-q.closed:
|
case <-closed:
|
||||||
q.mu.Lock()
|
|
||||||
defer q.mu.Unlock()
|
|
||||||
err := q.runOnCloseChecksLocked()
|
err := q.runOnCloseChecksLocked()
|
||||||
log.Ctx(ctx).Debug().Err(err).Msg("exporter stopped")
|
log.Ctx(ctx).Debug().Err(err).Msg("exporter stopped")
|
||||||
return err
|
return err
|
||||||
|
@ -595,19 +637,51 @@ func (e *DebugEvent) UnmarshalJSON(b []byte) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const shardCount = 64
|
||||||
|
|
||||||
|
type (
|
||||||
|
shardedSet [shardCount]map[oteltrace.SpanID]struct{}
|
||||||
|
shardedLocks [shardCount]sync.Mutex
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s *shardedSet) Range(f func(key oteltrace.SpanID)) {
|
||||||
|
for i := range shardCount {
|
||||||
|
for k := range s[i] {
|
||||||
|
f(k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *shardedLocks) LockAll() {
|
||||||
|
for i := range shardCount {
|
||||||
|
s[i].Lock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *shardedLocks) UnlockAll() {
|
||||||
|
for i := range shardCount {
|
||||||
|
s[i].Unlock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type spanTracker struct {
|
type spanTracker struct {
|
||||||
inflightSpans sync.Map
|
inflightSpansMu shardedLocks
|
||||||
allSpans sync.Map
|
inflightSpans shardedSet
|
||||||
debugFlags DebugFlags
|
allSpans sync.Map
|
||||||
observer *spanObserver
|
debugFlags DebugFlags
|
||||||
shutdownOnce sync.Once
|
observer *spanObserver
|
||||||
|
shutdownOnce sync.Once
|
||||||
}
|
}
|
||||||
|
|
||||||
func newSpanTracker(observer *spanObserver, debugFlags DebugFlags) *spanTracker {
|
func newSpanTracker(observer *spanObserver, debugFlags DebugFlags) *spanTracker {
|
||||||
return &spanTracker{
|
st := &spanTracker{
|
||||||
observer: observer,
|
observer: observer,
|
||||||
debugFlags: debugFlags,
|
debugFlags: debugFlags,
|
||||||
}
|
}
|
||||||
|
for i := range len(st.inflightSpans) {
|
||||||
|
st.inflightSpans[i] = make(map[oteltrace.SpanID]struct{})
|
||||||
|
}
|
||||||
|
return st
|
||||||
}
|
}
|
||||||
|
|
||||||
type spanInfo struct {
|
type spanInfo struct {
|
||||||
|
@ -626,13 +700,20 @@ func (t *spanTracker) ForceFlush(context.Context) error {
|
||||||
// OnEnd implements trace.SpanProcessor.
|
// OnEnd implements trace.SpanProcessor.
|
||||||
func (t *spanTracker) OnEnd(s sdktrace.ReadOnlySpan) {
|
func (t *spanTracker) OnEnd(s sdktrace.ReadOnlySpan) {
|
||||||
id := s.SpanContext().SpanID()
|
id := s.SpanContext().SpanID()
|
||||||
t.inflightSpans.Delete(id)
|
bucket := binary.BigEndian.Uint64(id[:]) % shardCount
|
||||||
|
t.inflightSpansMu[bucket].Lock()
|
||||||
|
defer t.inflightSpansMu[bucket].Unlock()
|
||||||
|
delete(t.inflightSpans[bucket], id)
|
||||||
}
|
}
|
||||||
|
|
||||||
// OnStart implements trace.SpanProcessor.
|
// OnStart implements trace.SpanProcessor.
|
||||||
func (t *spanTracker) OnStart(_ context.Context, s sdktrace.ReadWriteSpan) {
|
func (t *spanTracker) OnStart(_ context.Context, s sdktrace.ReadWriteSpan) {
|
||||||
id := s.SpanContext().SpanID()
|
id := s.SpanContext().SpanID()
|
||||||
t.inflightSpans.Store(id, struct{}{})
|
bucket := binary.BigEndian.Uint64(id[:]) % shardCount
|
||||||
|
t.inflightSpansMu[bucket].Lock()
|
||||||
|
defer t.inflightSpansMu[bucket].Unlock()
|
||||||
|
t.inflightSpans[bucket][id] = struct{}{}
|
||||||
|
|
||||||
if t.debugFlags.Check(TrackSpanReferences) {
|
if t.debugFlags.Check(TrackSpanReferences) {
|
||||||
t.observer.Observe(id)
|
t.observer.Observe(id)
|
||||||
}
|
}
|
||||||
|
@ -664,12 +745,13 @@ func (t *spanTracker) Shutdown(_ context.Context) error {
|
||||||
if t.debugFlags.Check(WarnOnIncompleteSpans) {
|
if t.debugFlags.Check(WarnOnIncompleteSpans) {
|
||||||
if t.debugFlags.Check(TrackAllSpans) {
|
if t.debugFlags.Check(TrackAllSpans) {
|
||||||
incompleteSpans := []*spanInfo{}
|
incompleteSpans := []*spanInfo{}
|
||||||
t.inflightSpans.Range(func(key, _ any) bool {
|
t.inflightSpansMu.LockAll()
|
||||||
|
t.inflightSpans.Range(func(key oteltrace.SpanID) {
|
||||||
if info, ok := t.allSpans.Load(key); ok {
|
if info, ok := t.allSpans.Load(key); ok {
|
||||||
incompleteSpans = append(incompleteSpans, info.(*spanInfo))
|
incompleteSpans = append(incompleteSpans, info.(*spanInfo))
|
||||||
}
|
}
|
||||||
return true
|
|
||||||
})
|
})
|
||||||
|
t.inflightSpansMu.UnlockAll()
|
||||||
if len(incompleteSpans) > 0 {
|
if len(incompleteSpans) > 0 {
|
||||||
didWarn = true
|
didWarn = true
|
||||||
msg := startMsg("WARNING: spans not ended:\n")
|
msg := startMsg("WARNING: spans not ended:\n")
|
||||||
|
@ -689,10 +771,11 @@ func (t *spanTracker) Shutdown(_ context.Context) error {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
incompleteSpans := []oteltrace.SpanID{}
|
incompleteSpans := []oteltrace.SpanID{}
|
||||||
t.inflightSpans.Range(func(key, _ any) bool {
|
t.inflightSpansMu.LockAll()
|
||||||
incompleteSpans = append(incompleteSpans, key.(oteltrace.SpanID))
|
t.inflightSpans.Range(func(key oteltrace.SpanID) {
|
||||||
return true
|
incompleteSpans = append(incompleteSpans, key)
|
||||||
})
|
})
|
||||||
|
t.inflightSpansMu.UnlockAll()
|
||||||
if len(incompleteSpans) > 0 {
|
if len(incompleteSpans) > 0 {
|
||||||
didWarn = true
|
didWarn = true
|
||||||
msg := startMsg("WARNING: spans not ended:\n")
|
msg := startMsg("WARNING: spans not ended:\n")
|
||||||
|
|
|
@ -53,10 +53,11 @@ func (obs *spanObserver) XObservedIDs() []oteltrace.SpanID {
|
||||||
|
|
||||||
func (t *spanTracker) XInflightSpans() []oteltrace.SpanID {
|
func (t *spanTracker) XInflightSpans() []oteltrace.SpanID {
|
||||||
ids := []oteltrace.SpanID{}
|
ids := []oteltrace.SpanID{}
|
||||||
t.inflightSpans.Range(func(key, _ any) bool {
|
t.inflightSpansMu.LockAll()
|
||||||
ids = append(ids, key.(oteltrace.SpanID))
|
t.inflightSpans.Range(func(key oteltrace.SpanID) {
|
||||||
return true
|
ids = append(ids, key)
|
||||||
})
|
})
|
||||||
|
t.inflightSpansMu.UnlockAll()
|
||||||
slices.SortFunc(ids, func(a, b oteltrace.SpanID) int {
|
slices.SortFunc(ids, func(a, b oteltrace.SpanID) int {
|
||||||
return cmp.Compare(a.String(), b.String())
|
return cmp.Compare(a.String(), b.String())
|
||||||
})
|
})
|
||||||
|
|
|
@ -303,7 +303,7 @@ func WithTraceClient(traceClient otlptrace.Client) EnvironmentOption {
|
||||||
|
|
||||||
var setGrpcLoggerOnce sync.Once
|
var setGrpcLoggerOnce sync.Once
|
||||||
|
|
||||||
const defaultTraceDebugFlags = trace.TrackSpanCallers
|
const defaultTraceDebugFlags = trace.TrackSpanCallers | trace.TrackSpanReferences
|
||||||
|
|
||||||
var (
|
var (
|
||||||
flagDebug = flag.Bool("env.debug", false, "enables test environment debug logging (equivalent to Debug() option)")
|
flagDebug = flag.Bool("env.debug", false, "enables test environment debug logging (equivalent to Debug() option)")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue