core: more metrics (#5629)

## Summary
Add some more metrics:

- Authenticate token verification
- Authorization log duration
- Authorization evaluator and header evaluator
- IDP token session creator

HTTP and gRPC endpoints are already instrumented via middleware, which
covers authenticate, proxy and databroker endpoints. Postgres is also
already instrumented using `otelpgx`.

## Related issues
-
[ENG-2407](https://linear.app/pomerium/issue/ENG-2407/add-additional-metrics-and-tracing-spans-to-pomerium)


## Checklist

- [x] reference any related issues
- [ ] updated unit tests
- [ ] add appropriate label (`enhancement`, `bug`, `breaking`,
`dependencies`, `ci`)
- [x] ready for review
This commit is contained in:
Caleb Doxsey 2025-05-29 09:34:41 -06:00 committed by GitHub
parent 957e0982c1
commit 13554ec78d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 164 additions and 2 deletions

View file

@ -7,11 +7,13 @@ import (
"errors"
"fmt"
"go.opentelemetry.io/otel/metric"
oteltrace "go.opentelemetry.io/otel/trace"
"github.com/pomerium/pomerium/config"
"github.com/pomerium/pomerium/internal/atomicutil"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/telemetry/metrics"
"github.com/pomerium/pomerium/pkg/cryptutil"
"github.com/pomerium/pomerium/pkg/telemetry/trace"
)
@ -41,6 +43,15 @@ func ValidateOptions(o *config.Options) error {
// Authenticate contains data required to run the authenticate service.
type Authenticate struct {
accessTokenVerificationCount metric.Int64Counter
accessTokenValidVerificationCount metric.Int64Counter
accessTokenInvalidVerificationCount metric.Int64Counter
accessTokenVerificationDuration metric.Int64Histogram
identityTokenVerificationCount metric.Int64Counter
identityTokenValidVerificationCount metric.Int64Counter
identityTokenInvalidVerificationCount metric.Int64Counter
identityTokenVerificationDuration metric.Int64Histogram
cfg *authenticateConfig
options *atomicutil.Value[*config.Options]
state *atomicutil.Value[*authenticateState]
@ -56,6 +67,31 @@ func New(ctx context.Context, cfg *config.Config, options ...Option) (*Authentic
tracer := tracerProvider.Tracer(trace.PomeriumCoreTracer)
a := &Authenticate{
accessTokenVerificationCount: metrics.Int64Counter("authenticate.idp_access_token.verifications",
metric.WithDescription("Number of IDP access token verifications."),
metric.WithUnit("{verification}")),
accessTokenValidVerificationCount: metrics.Int64Counter("authenticate.idp_access_token.valid_verifications",
metric.WithDescription("Number of valid IDP access token verifications."),
metric.WithUnit("{verification}")),
accessTokenInvalidVerificationCount: metrics.Int64Counter("authenticate.idp_access_token.invalid_verifications",
metric.WithDescription("Number of invalid IDP access token verifications."),
metric.WithUnit("{verification}")),
accessTokenVerificationDuration: metrics.Int64Histogram("authenticate.idp_access_token.verification.duration",
metric.WithDescription("Duration of access token verification."),
metric.WithUnit("ms")),
identityTokenVerificationCount: metrics.Int64Counter("authenticate.idp_identity_token.verifications",
metric.WithDescription("Number of IDP identity token verifications."),
metric.WithUnit("{verification}")),
identityTokenValidVerificationCount: metrics.Int64Counter("authenticate.idp_identity_token.valid_verifications",
metric.WithDescription("Number of valid IDP identity token verifications."),
metric.WithUnit("{verification}")),
identityTokenInvalidVerificationCount: metrics.Int64Counter("authenticate.idp_identity_token.invalid_verifications",
metric.WithDescription("Number of invalid IDP identity token verifications."),
metric.WithUnit("{verification}")),
identityTokenVerificationDuration: metrics.Int64Histogram("authenticate.idp_identity_token.verification.duration",
metric.WithDescription("Duration of identity token verification."),
metric.WithUnit("ms")),
cfg: authenticateConfig,
options: config.NewAtomicOptions(),
state: atomicutil.NewValue(newAuthenticateState()),

View file

@ -3,6 +3,7 @@ package authenticate
import (
"encoding/json"
"net/http"
"time"
"github.com/pomerium/pomerium/internal/httputil"
"github.com/pomerium/pomerium/internal/log"
@ -10,6 +11,10 @@ import (
)
func (a *Authenticate) verifyAccessToken(w http.ResponseWriter, r *http.Request) error {
start := time.Now()
a.accessTokenVerificationCount.Add(r.Context(), 1)
var req authenticateapi.VerifyAccessTokenRequest
err := json.NewDecoder(r.Body).Decode(&req)
if err != nil {
@ -24,9 +29,11 @@ func (a *Authenticate) verifyAccessToken(w http.ResponseWriter, r *http.Request)
var res authenticateapi.VerifyTokenResponse
claims, err := authenticator.VerifyAccessToken(r.Context(), req.AccessToken)
if err == nil {
a.accessTokenValidVerificationCount.Add(r.Context(), 1)
res.Valid = true
res.Claims = claims
} else {
a.accessTokenInvalidVerificationCount.Add(r.Context(), 1)
res.Valid = false
log.Ctx(r.Context()).Info().
Err(err).
@ -39,10 +46,16 @@ func (a *Authenticate) verifyAccessToken(w http.ResponseWriter, r *http.Request)
return err
}
a.accessTokenVerificationDuration.Record(r.Context(), time.Since(start).Milliseconds())
return nil
}
func (a *Authenticate) verifyIdentityToken(w http.ResponseWriter, r *http.Request) error {
start := time.Now()
a.identityTokenVerificationCount.Add(r.Context(), 1)
var req authenticateapi.VerifyIdentityTokenRequest
err := json.NewDecoder(r.Body).Decode(&req)
if err != nil {
@ -57,9 +70,11 @@ func (a *Authenticate) verifyIdentityToken(w http.ResponseWriter, r *http.Reques
var res authenticateapi.VerifyTokenResponse
claims, err := authenticator.VerifyIdentityToken(r.Context(), req.IdentityToken)
if err == nil {
a.identityTokenValidVerificationCount.Add(r.Context(), 1)
res.Valid = true
res.Claims = claims
} else {
a.identityTokenInvalidVerificationCount.Add(r.Context(), 1)
res.Valid = false
log.Ctx(r.Context()).Info().
Err(err).
@ -72,5 +87,7 @@ func (a *Authenticate) verifyIdentityToken(w http.ResponseWriter, r *http.Reques
return err
}
a.identityTokenVerificationDuration.Record(r.Context(), time.Since(start).Milliseconds())
return nil
}

View file

@ -8,6 +8,7 @@ import (
"slices"
"github.com/rs/zerolog"
"go.opentelemetry.io/otel/metric"
oteltrace "go.opentelemetry.io/otel/trace"
"golang.org/x/sync/errgroup"
@ -24,6 +25,8 @@ import (
// Authorize struct holds
type Authorize struct {
logDuration metric.Int64Histogram
state *atomicutil.Value[*authorizeState]
store *store.Store
currentConfig *atomicutil.Value[*config.Config]
@ -39,6 +42,10 @@ func New(ctx context.Context, cfg *config.Config) (*Authorize, error) {
tracer := tracerProvider.Tracer(trace.PomeriumCoreTracer)
a := &Authorize{
logDuration: metrics.Int64Histogram("authorize.log.duration",
metric.WithDescription("Duration of authorize log execution."),
metric.WithUnit("ms")),
currentConfig: atomicutil.NewValue(cfg),
store: store.New(),
tracerProvider: tracerProvider,

View file

@ -15,6 +15,7 @@ import (
"github.com/go-jose/go-jose/v3"
"github.com/hashicorp/go-set/v3"
"github.com/open-policy-agent/opa/rego"
"go.opentelemetry.io/otel/metric"
"golang.org/x/sync/errgroup"
"google.golang.org/protobuf/types/known/structpb"
@ -24,6 +25,7 @@ import (
"github.com/pomerium/pomerium/internal/errgrouputil"
"github.com/pomerium/pomerium/internal/httputil"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/telemetry/metrics"
"github.com/pomerium/pomerium/pkg/contextutil"
"github.com/pomerium/pomerium/pkg/cryptutil"
"github.com/pomerium/pomerium/pkg/policy/criteria"
@ -142,6 +144,9 @@ type Result struct {
// An Evaluator evaluates policies.
type Evaluator struct {
evaluationCount, allowCount, denyCount metric.Int64Counter
evaluationDuration metric.Int64Histogram
store *store.Store
policyEvaluators map[string]*PolicyEvaluator
headersEvaluators *HeadersEvaluator
@ -164,6 +169,19 @@ func New(
}
e := &Evaluator{
evaluationCount: metrics.Int64Counter("authorize.evaluator.evaluations",
metric.WithDescription("Number of evaluations."),
metric.WithUnit("{evaluation}")),
allowCount: metrics.Int64Counter("authorize.evaluator.allowals",
metric.WithDescription("Number of allowals."),
metric.WithUnit("{allowal}")),
denyCount: metrics.Int64Counter("authorize.evaluator.denials",
metric.WithDescription("Number of denials."),
metric.WithUnit("{denial}")),
evaluationDuration: metrics.Int64Histogram("authorize.evaluator.evaluation.duration",
metric.WithDescription("Duration of evaluation."),
metric.WithUnit("ms")),
store: store,
clientCA: cfg.ClientCA,
clientCRL: cfg.ClientCRL,
@ -252,6 +270,8 @@ func (e *Evaluator) Evaluate(ctx context.Context, req *Request) (*Result, error)
ctx, span := trace.Continue(ctx, "authorize.Evaluator.Evaluate")
defer span.End()
start := time.Now()
eg, ctx := errgroup.WithContext(ctx)
var policyOutput *PolicyResponse
@ -277,6 +297,14 @@ func (e *Evaluator) Evaluate(ctx context.Context, req *Request) (*Result, error)
return nil, err
}
e.evaluationCount.Add(ctx, 1)
if policyOutput.Deny.Value {
e.denyCount.Add(ctx, 1)
} else if policyOutput.Allow.Value {
e.allowCount.Add(ctx, 1)
}
e.evaluationDuration.Record(ctx, time.Since(start).Microseconds())
res := &Result{
Allow: policyOutput.Allow,
Deny: policyOutput.Deny,

View file

@ -6,9 +6,11 @@ import (
"time"
"github.com/open-policy-agent/opa/rego"
"go.opentelemetry.io/otel/metric"
"github.com/pomerium/pomerium/authorize/internal/store"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/telemetry/metrics"
"github.com/pomerium/pomerium/pkg/telemetry/trace"
)
@ -20,12 +22,22 @@ type HeadersResponse struct {
// A HeadersEvaluator evaluates the headers.rego script.
type HeadersEvaluator struct {
evaluationCount metric.Int64Counter
evaluationDuration metric.Int64Histogram
store *store.Store
}
// NewHeadersEvaluator creates a new HeadersEvaluator.
func NewHeadersEvaluator(store *store.Store) *HeadersEvaluator {
return &HeadersEvaluator{
evaluationCount: metrics.Int64Counter("authorize.header_evaluator.evaluations",
metric.WithDescription("Number of header evaluations."),
metric.WithUnit("{evaluation}")),
evaluationDuration: metrics.Int64Histogram("authorize.header_evaluator.evaluation.duration",
metric.WithDescription("Duration of header evaluation."),
metric.WithUnit("ms")),
store: store,
}
}
@ -35,6 +47,9 @@ func (e *HeadersEvaluator) Evaluate(ctx context.Context, req *Request, options .
ctx, span := trace.Continue(ctx, "authorize.HeadersEvaluator.Evaluate")
defer span.End()
e.evaluationCount.Add(ctx, 1)
start := time.Now()
ectx := new(rego.EvalContext)
for _, option := range options {
option(ectx)
@ -43,5 +58,7 @@ func (e *HeadersEvaluator) Evaluate(ctx context.Context, req *Request, options .
if now.IsZero() {
now = time.Now()
}
return newHeadersEvaluatorEvaluation(e, req, now).execute(ctx)
res, err := newHeadersEvaluatorEvaluation(e, req, now).execute(ctx)
e.evaluationDuration.Record(ctx, time.Since(start).Milliseconds())
return res, err
}

View file

@ -2,6 +2,7 @@ package authorize
import (
"context"
"time"
"github.com/go-jose/go-jose/v3/jwt"
"github.com/rs/zerolog"
@ -25,6 +26,8 @@ func (a *Authorize) logAuthorizeCheck(
ctx, span := a.tracer.Start(ctx, "authorize.grpc.LogAuthorizeCheck")
defer span.End()
start := time.Now()
hdrs := req.HTTP.Headers
impersonateDetails := a.getImpersonateDetails(ctx, s)
@ -59,6 +62,7 @@ func (a *Authorize) logAuthorizeCheck(
}
evt.Msg("authorize check")
a.logDuration.Record(ctx, time.Since(start).Milliseconds())
}
type impersonateDetails struct {

View file

@ -9,6 +9,7 @@ import (
"time"
"github.com/google/uuid"
"go.opentelemetry.io/otel/metric"
"golang.org/x/sync/singleflight"
"google.golang.org/protobuf/types/known/timestamppb"
@ -20,6 +21,7 @@ import (
"github.com/pomerium/pomerium/internal/sessions/cookie"
"github.com/pomerium/pomerium/internal/sessions/header"
"github.com/pomerium/pomerium/internal/sessions/queryparam"
"github.com/pomerium/pomerium/internal/telemetry/metrics"
"github.com/pomerium/pomerium/internal/urlutil"
"github.com/pomerium/pomerium/pkg/authenticateapi"
"github.com/pomerium/pomerium/pkg/grpc/databroker"
@ -137,6 +139,13 @@ type IncomingIDPTokenSessionCreator interface {
}
type incomingIDPTokenSessionCreator struct {
accessTokenSessionsCreatedCount metric.Int64Counter
accessTokenSessionsCachedCount metric.Int64Counter
accessTokenCreateSessionDuration metric.Int64Histogram
identityTokenSessionsCreatedCount metric.Int64Counter
identityTokenSessionsCachedCount metric.Int64Counter
identityTokenCreateSessionDuration metric.Int64Histogram
timeNow func() time.Time
getRecord func(ctx context.Context, recordType, recordID string) (*databroker.Record, error)
putRecords func(ctx context.Context, records []*databroker.Record) error
@ -147,7 +156,30 @@ func NewIncomingIDPTokenSessionCreator(
getRecord func(ctx context.Context, recordType, recordID string) (*databroker.Record, error),
putRecords func(ctx context.Context, records []*databroker.Record) error,
) IncomingIDPTokenSessionCreator {
return &incomingIDPTokenSessionCreator{timeNow: time.Now, getRecord: getRecord, putRecords: putRecords}
return &incomingIDPTokenSessionCreator{
accessTokenSessionsCreatedCount: metrics.Int64Counter("config.idp_token_session_creator.access_token.sessions_created",
metric.WithDescription("Number of sessions created from IDP access tokens."),
metric.WithUnit("{session}")),
accessTokenSessionsCachedCount: metrics.Int64Counter("config.idp_token_session_creator.access_token.sessions_cached",
metric.WithDescription("Number of sessions cached from IDP access tokens."),
metric.WithUnit("{session}")),
accessTokenCreateSessionDuration: metrics.Int64Histogram("config.idp_token_session_creator.access_token.create_session.duration",
metric.WithDescription("Duration of create session from IDP access tokens."),
metric.WithUnit("ms")),
identityTokenSessionsCreatedCount: metrics.Int64Counter("config.idp_token_session_creator.identity_token.sessions_created",
metric.WithDescription("Number of sessions created from IDP identity tokens."),
metric.WithUnit("{session}")),
identityTokenSessionsCachedCount: metrics.Int64Counter("config.idp_token_session_creator.identity_token.sessions_cached",
metric.WithDescription("Number of sessions cached from IDP identity tokens."),
metric.WithUnit("{session}")),
identityTokenCreateSessionDuration: metrics.Int64Histogram("config.idp_token_session_creator.identity_token.create_session.duration",
metric.WithDescription("Duration of create session from IDP identity tokens."),
metric.WithUnit("ms")),
timeNow: time.Now,
getRecord: getRecord,
putRecords: putRecords,
}
}
// CreateSession attempts to create a session for incoming idp access and
@ -176,6 +208,8 @@ func (c *incomingIDPTokenSessionCreator) createSessionAccessToken(
policy *Policy,
rawAccessToken string,
) (*session.Session, error) {
start := time.Now()
idp, err := cfg.Options.GetIdentityProviderForPolicy(policy)
if err != nil {
return nil, fmt.Errorf("error getting identity provider to verify access token: %w", err)
@ -185,6 +219,7 @@ func (c *incomingIDPTokenSessionCreator) createSessionAccessToken(
res, err, _ := c.singleflight.Do(sessionID, func() (any, error) {
s, err := c.getSession(ctx, sessionID)
if err == nil {
c.accessTokenSessionsCachedCount.Add(ctx, 1)
return s, nil
} else if !storage.IsNotFound(err) {
return nil, err
@ -225,11 +260,14 @@ func (c *incomingIDPTokenSessionCreator) createSessionAccessToken(
return nil, fmt.Errorf("error saving session and user: %w", err)
}
c.accessTokenSessionsCreatedCount.Add(ctx, 1)
return s, nil
})
if err != nil {
return nil, err
}
c.accessTokenCreateSessionDuration.Record(ctx, time.Since(start).Milliseconds())
return res.(*session.Session), nil
}
@ -239,6 +277,8 @@ func (c *incomingIDPTokenSessionCreator) createSessionForIdentityToken(
policy *Policy,
rawIdentityToken string,
) (*session.Session, error) {
start := time.Now()
idp, err := cfg.Options.GetIdentityProviderForPolicy(policy)
if err != nil {
return nil, fmt.Errorf("error getting identity provider to verify identity token: %w", err)
@ -248,6 +288,7 @@ func (c *incomingIDPTokenSessionCreator) createSessionForIdentityToken(
res, err, _ := c.singleflight.Do(sessionID, func() (any, error) {
s, err := c.getSession(ctx, sessionID)
if err == nil {
c.identityTokenSessionsCachedCount.Add(ctx, 1)
return s, nil
} else if !storage.IsNotFound(err) {
return nil, err
@ -284,11 +325,14 @@ func (c *incomingIDPTokenSessionCreator) createSessionForIdentityToken(
return nil, fmt.Errorf("error saving session and user: %w", err)
}
c.identityTokenSessionsCreatedCount.Add(ctx, 1)
return s, nil
})
if err != nil {
return nil, err
}
c.identityTokenCreateSessionDuration.Record(ctx, time.Since(start).Milliseconds())
return res.(*session.Session), nil
}

View file

@ -31,3 +31,12 @@ func Int64Counter(name string, options ...metric.Int64CounterOption) metric.Int6
}
return c
}
// Int64Histogram returns an int64 histogram.
func Int64Histogram(name string, options ...metric.Int64HistogramOption) metric.Int64Histogram {
c, err := Meter.Int64Histogram(name, options...)
if err != nil {
panic(err)
}
return c
}