diff --git a/internal/telemetry/metrics/pgxpool.go b/internal/telemetry/metrics/pgxpool.go new file mode 100644 index 000000000..623a8c165 --- /dev/null +++ b/internal/telemetry/metrics/pgxpool.go @@ -0,0 +1,156 @@ +package metrics + +import ( + "sync" + "time" + + "github.com/jackc/pgx/v5/pgxpool" + "go.opencensus.io/metric" + + "github.com/pomerium/pomerium/pkg/metrics" +) + +var ( + pgxpoolAcquireCount *metric.Int64DerivedGauge + pgxpoolAcquireDurationSeconds *metric.Float64DerivedGauge + pgxpoolAcquiredConns *metric.Int64DerivedGauge + pgxpoolCanceledAcquireCount *metric.Int64DerivedGauge + pgxpoolConstructingConns *metric.Int64DerivedGauge + pgxpoolEmptyAcquireCount *metric.Int64DerivedGauge + pgxpoolIdleConns *metric.Int64DerivedGauge + pgxpoolMaxConns *metric.Int64DerivedGauge + pgxpoolMaxIdleDestroyCount *metric.Int64DerivedGauge + pgxpoolMaxLifetimeDestroyCount *metric.Int64DerivedGauge + pgxpoolNewConnsCount *metric.Int64DerivedGauge +) + +func registerPgxpoolStatMetrics(registry *metric.Registry) { + pgxpoolAcquireCount, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolAcquireCount, + metric.WithDescription("Cumulative count of successful acquires from the current database connection pool."), + ) + pgxpoolAcquireDurationSeconds, _ = registry.AddFloat64DerivedGauge( + metrics.PgxpoolAcquireDurationSeconds, + metric.WithDescription("Total duration of all successful acquires from the current database connection pool."), + ) + pgxpoolAcquiredConns, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolAcquiredConns, + metric.WithDescription("Number of currently acquired connections in the current database connection pool."), + ) + pgxpoolCanceledAcquireCount, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolCanceledAcquireCount, + metric.WithDescription("Cumulative count of acquires from the current database connection pool that were canceled by a context."), + ) + pgxpoolConstructingConns, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolConstructingConns, + metric.WithDescription("Number of connections with construction in progress in the current database connection pool."), + ) + pgxpoolEmptyAcquireCount, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolEmptyAcquireCount, + metric.WithDescription("Cumulative count of successful acquires from the current database connection pool that waited for a resource to be released or constructed because the pool was empty."), + ) + pgxpoolIdleConns, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolIdleConns, + metric.WithDescription("Number of currently idle connections in the current database connection pool."), + ) + pgxpoolMaxConns, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolMaxConns, + metric.WithDescription("Maximum size of the current database connection pool."), + ) + pgxpoolMaxIdleDestroyCount, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolMaxIdleDestroyCount, + metric.WithDescription("Cumulative count of database connections destroyed by the current database connection pool because they exceeded the MaxConnIdleTime."), + ) + pgxpoolMaxLifetimeDestroyCount, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolMaxLifetimeDestroyCount, + metric.WithDescription("Cumulative count of database connections destroyed by the current database connection pool because they exceeded the MaxConnLifetime."), + ) + pgxpoolNewConnsCount, _ = registry.AddInt64DerivedGauge( + metrics.PgxpoolNewConnsCount, + metric.WithDescription("Cumulative count of new database connections opened by the current database connection pool."), + ) +} + +func ConnectPgxpoolStatMetrics(pool *pgxpool.Pool) { + var w = pgxpoolStatsWrapper{pool: pool} + w.connect() +} + +type pgxpoolStatsWrapper struct { + pool *pgxpool.Pool + mu sync.Mutex + cached *pgxpool.Stat + timestamp time.Time +} + +func (w *pgxpoolStatsWrapper) stats() *pgxpool.Stat { + // Don't request a new stats snapshot more often than this interval. + const cacheInterval = 5 * time.Second + + w.mu.Lock() + defer w.mu.Unlock() + + if w.cached == nil || time.Since(w.timestamp) > cacheInterval { + w.cached = w.pool.Stat() + } + + return w.cached +} + +func (w *pgxpoolStatsWrapper) acquireCount() int64 { + return w.stats().AcquireCount() +} + +func (w *pgxpoolStatsWrapper) acquireDurationSeconds() float64 { + return w.stats().AcquireDuration().Seconds() +} + +func (w *pgxpoolStatsWrapper) acquiredConns() int64 { + return int64(w.stats().AcquiredConns()) +} + +func (w *pgxpoolStatsWrapper) canceledAcquireCount() int64 { + return w.stats().CanceledAcquireCount() +} + +func (w *pgxpoolStatsWrapper) constructingConns() int64 { + return int64(w.stats().ConstructingConns()) +} + +func (w *pgxpoolStatsWrapper) emptyAcquireCount() int64 { + return w.stats().EmptyAcquireCount() +} + +func (w *pgxpoolStatsWrapper) idleConns() int64 { + return int64(w.stats().IdleConns()) +} + +func (w *pgxpoolStatsWrapper) maxConns() int64 { + return int64(w.stats().MaxConns()) +} + +func (w *pgxpoolStatsWrapper) maxIdleDestroyCount() int64 { + return w.stats().MaxIdleDestroyCount() +} + +func (w *pgxpoolStatsWrapper) maxLifetimeDestroyCount() int64 { + return w.stats().MaxLifetimeDestroyCount() +} + +func (w *pgxpoolStatsWrapper) newConnsCount() int64 { + return w.stats().NewConnsCount() +} + +func (w *pgxpoolStatsWrapper) connect() { + pgxpoolAcquireCount.UpsertEntry(w.acquireCount) + pgxpoolAcquireDurationSeconds.UpsertEntry(w.acquireDurationSeconds) + pgxpoolAcquiredConns.UpsertEntry(w.acquiredConns) + pgxpoolCanceledAcquireCount.UpsertEntry(w.canceledAcquireCount) + pgxpoolConstructingConns.UpsertEntry(w.constructingConns) + pgxpoolEmptyAcquireCount.UpsertEntry(w.emptyAcquireCount) + pgxpoolIdleConns.UpsertEntry(w.idleConns) + pgxpoolMaxConns.UpsertEntry(w.maxConns) + pgxpoolMaxIdleDestroyCount.UpsertEntry(w.maxIdleDestroyCount) + pgxpoolMaxLifetimeDestroyCount.UpsertEntry(w.maxLifetimeDestroyCount) + pgxpoolNewConnsCount.UpsertEntry(w.newConnsCount) +} diff --git a/internal/telemetry/metrics/pgxpool_test.go b/internal/telemetry/metrics/pgxpool_test.go new file mode 100644 index 000000000..465b0f2d7 --- /dev/null +++ b/internal/telemetry/metrics/pgxpool_test.go @@ -0,0 +1,47 @@ +package metrics + +import ( + "context" + "testing" + + "github.com/jackc/pgx/v5/pgxpool" + "github.com/stretchr/testify/require" + "go.opencensus.io/metric/metricdata" + + "github.com/pomerium/pomerium/pkg/metrics" +) + +func TestPgxpoolStatMetrics(t *testing.T) { + registry = newMetricRegistry() + registerPgxpoolStatMetrics(registry.registry) + + config, err := pgxpool.ParseConfig("pool_max_conns=42") + require.NoError(t, err) + pool, err := pgxpool.NewWithConfig(context.Background(), config) + require.NoError(t, err) + + ConnectPgxpoolStatMetrics(pool) + + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(0), metrics.PgxpoolAcquireCount) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + float64(0), metrics.PgxpoolAcquireDurationSeconds) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(0), metrics.PgxpoolAcquiredConns) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(0), metrics.PgxpoolCanceledAcquireCount) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(0), metrics.PgxpoolConstructingConns) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(0), metrics.PgxpoolEmptyAcquireCount) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(0), metrics.PgxpoolIdleConns) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(42), metrics.PgxpoolMaxConns) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(0), metrics.PgxpoolMaxIdleDestroyCount) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(0), metrics.PgxpoolMaxLifetimeDestroyCount) + testMetricRetrieval(registry.registry.Read(), t, []metricdata.LabelValue{}, + int64(0), metrics.PgxpoolNewConnsCount) +} diff --git a/internal/telemetry/metrics/registry.go b/internal/telemetry/metrics/registry.go index 74ffd747c..4487b2095 100644 --- a/internal/telemetry/metrics/registry.go +++ b/internal/telemetry/metrics/registry.go @@ -75,6 +75,8 @@ func (r *metricRegistry) init() { if err != nil { log.Ctx(ctx).Error().Err(err).Msg("telemetry/metrics: failed to register autocert metrics") } + + registerPgxpoolStatMetrics(r.registry) }) } diff --git a/pkg/metrics/constants.go b/pkg/metrics/constants.go index abcab6f96..44fa2528c 100644 --- a/pkg/metrics/constants.go +++ b/pkg/metrics/constants.go @@ -63,6 +63,43 @@ const ( ConfigDBErrors = "config_db_errors" // ConfigDBErrorsHelp is the help text for ConfigDBErrors. ConfigDBErrorsHelp = "amount of errors observed while applying databroker config; -1 if validation failed and was rejected altogether" + + // PgxpoolAcquireCount is the cumulative count of successful acquires from + // the current database connection pool. + PgxpoolAcquireCount = "pgxpool_acquire_count" + // PgxpoolAcquireDuration is the total duration of all successful acquires + // from the current database connection pool. + PgxpoolAcquireDurationSeconds = "pgxpool_acquire_duration_seconds" + // PgxpoolAcquiredConns is the number of currently acquired connections in + // the current database connection pool. + PgxpoolAcquiredConns = "pgxpool_acquired_conns" + // PgxpoolCanceledAcquireCount is the cumulative count of acquires from the + // current database connection pool that were canceled by a context. + PgxpoolCanceledAcquireCount = "pgxpool_canceled_acquire_count" + // PgxpoolConstructingConns is the number of conns with construction in + // progress in the current database connection pool. + PgxpoolConstructingConns = "pgxpool_constructing_conns" + // PgxpoolEmptyAcquireCount is the cumulative count of successful acquires + // from the current database connection pool that waited for a resource to + // be released or constructed because the pool was empty. + PgxpoolEmptyAcquireCount = "pgxpool_empty_acquire_count" + // PgxpoolIdleConns is the number of currently idle conns in the current + // database connection pool. + PgxpoolIdleConns = "pgxpool_idle_conns" + // PgxpoolMaxConns is the maximum size of the current database connection + // pool. + PgxpoolMaxConns = "pgxpool_max_conns" + // PgxpoolMaxIdleDestroyCount is the cumulative count of connections + // destroyed by the current database connection pool because they exceeded + // the MaxConnIdleTime. + PgxpoolMaxIdleDestroyCount = "pgxpool_max_idle_destroy_count" + // PgxpoolMaxLifetimeDestroyCount is the cumulative count of connections + // destroyed by the current database connection pool because they exceeded + // the MaxConnLifetime. + PgxpoolMaxLifetimeDestroyCount = "pgxpool_max_lifetime_destroy_count" + // PgxpoolNewConnsCount is the cumulative count of new connections opened + // by the current database connection pool. + PgxpoolNewConnsCount = "pgxpool_new_conns_count" ) // labels diff --git a/pkg/storage/postgres/backend.go b/pkg/storage/postgres/backend.go index 5a8123375..3f81cf6bb 100644 --- a/pkg/storage/postgres/backend.go +++ b/pkg/storage/postgres/backend.go @@ -16,6 +16,7 @@ import ( "github.com/pomerium/pomerium/internal/log" "github.com/pomerium/pomerium/internal/signal" + "github.com/pomerium/pomerium/internal/telemetry/metrics" "github.com/pomerium/pomerium/pkg/contextutil" "github.com/pomerium/pomerium/pkg/grpc/databroker" "github.com/pomerium/pomerium/pkg/health" @@ -383,6 +384,7 @@ func (backend *Backend) init(ctx context.Context) (serverVersion uint64, pool *p backend.serverVersion = serverVersion backend.pool = pool + metrics.ConnectPgxpoolStatMetrics(pool) return serverVersion, pool, nil }