Add storage metrics (#554)

* Add cache storage metrics - autocache client metrics - autocache server metrics - boltdb metrics - redis client metrics - refactor metrics registry to be general purpose
2025-08-06 02:09:15 +02:00 · 2020-03-23 22:07:48 -04:00 · 2020-03-23 22:07:48 -04:00 · cc504362e4
commit cc504362e4
parent acfc880421
8 changed files with 400 additions and 108 deletions
--- a/docs/configuration/readme.md
+++ b/docs/configuration/readme.md
@ -325,7 +325,35 @@ Expose a prometheus format HTTP endpoint on the specified port. Disabled by defa
 **Metrics tracked**
 | Name                                          | Type      | Description                                                             |
-| :-------------------------------------------- | :-------- | :---------------------------------------------------------------------- |
+| --------------------------------------------- | --------- | ----------------------------------------------------------------------- |
 | boltdb_free_alloc_size_bytes                  | Gauge     | Bytes allocated in free pages                                           |
 | boltdb_free_page_n                            | Gauge     | Number of free pages on the freelist                                    |
 | boltdb_freelist_inuse_size_bytes              | Gauge     | Bytes used by the freelist                                              |
 | boltdb_open_txn                               | Gauge     | number of currently open read transactions                              |
 | boltdb_pending_page_n                         | Gauge     | Number of pending pages on the freelist                                 |
 | boltdb_txn                                    | Gauge     | total number of started read transactions                               |
 | boltdb_txn_cursor_total                       | Counter   | Total number of cursors created                                         |
 | boltdb_txn_node_deref_total                   | Counter   | Total number of node dereferences                                       |
 | boltdb_txn_node_total                         | Counter   | Total number of node allocations                                        |
 | boltdb_txn_page_alloc_size_bytes_total        | Counter   | Total bytes allocated                                                   |
 | boltdb_txn_page_total                         | Counter   | Total number of page allocations                                        |
 | boltdb_txn_rebalance_duration_ms_total        | Counter   | Total time spent rebalancing                                            |
 | boltdb_txn_rebalance_total                    | Counter   | Total number of node rebalances                                         |
 | boltdb_txn_spill_duration_ms_total            | Counter   | Total time spent spilling                                               |
 | boltdb_txn_spill_total                        | Counter   | Total number of nodes spilled                                           |
 | boltdb_txn_split_total                        | Counter   | Total number of nodes split                                             |
 | boltdb_txn_write_duration_ms_total            | Counter   | Total time spent writing to disk                                        |
 | boltdb_txn_write_total                        | Counter   | Total number of writes performed                                        |
 | groupcache_cache_hits_total                   | Counter   | Total cache hits in local or cluster cache                              |
 | groupcache_cache_hits_total                   | Counter   | Total cache hits in local or cluster cache                              |
 | groupcache_gets_total                         | Counter   | Total get request, including from peers                                 |
 | groupcache_loads_deduped_total                | Counter   | gets without cache hits after duplicate suppression                     |
 | groupcache_loads_total                        | Counter   | Total gets without cache hits                                           |
 | groupcache_local_load_errs_total              | Counter   | Total local load errors                                                 |
 | groupcache_local_loads_total                  | Counter   | Total good local loads                                                  |
 | groupcache_peer_errors_total                  | Counter   | Total errors from peers                                                 |
 | groupcache_peer_loads_total                   | Counter   | Total remote loads or cache hits without error                          |
 | groupcache_server_requests_total              | Counter   | Total gets from peers                                                   |
 | grpc_client_request_duration_ms               | Histogram | GRPC client request duration by service                                 |
 | grpc_client_request_size_bytes                | Histogram | GRPC client request size by service                                     |
 | grpc_client_requests_total                    | Counter   | Total GRPC client requests made by service                              |
@ -342,10 +370,17 @@ Expose a prometheus format HTTP endpoint on the specified port. Disabled by defa
 | http_server_request_size_bytes                | Histogram | HTTP server request size by service                                     |
 | http_server_requests_total                    | Counter   | Total HTTP server requests handled by service                           |
 | http_server_response_size_bytes               | Histogram | HTTP server response size by service                                    |
 | pomerium_build_info                           | Gauge     | Pomerium build metadata by git revision, service, version and goversion |
 | pomerium_config_checksum_int64                | Gauge     | Currently loaded configuration checksum by service                      |
 | pomerium_config_last_reload_success           | Gauge     | Whether the last configuration reload succeeded by service              |
 | pomerium_config_last_reload_success_timestamp | Gauge     | The timestamp of the last successful configuration reload by service    |
-| pomerium_build_info                           | Gauge     | Pomerium build metadata by git revision, service, version and goversion |
+| redis_conns                                   | Gauge     | Number of total connections in the pool                                 |
 | redis_hits_total                              | Counter   | Total number of times free connection was found in the pool             |
 | redis_idle_conns                              | Gauge     | Number of idle connections in the pool                                  |
 | redis_misses_total                            | Counter   | Total number of times free connection was NOT found in the pool         |
 | redis_stale_conns_total                       | Counter   | Total number of stale connections removed from the pool                 |
 | redis_timeouts_total                          | Counter   | Total number of times a wait timeout occurred                           |
 ### Tracing
@ -357,8 +392,8 @@ Each unit work is called a Span in a trace. Spans include metadata about the wor
 | Config Key       | Description                                                       | Required |
 | :--------------- | :---------------------------------------------------------------- | -------- |
-| tracing_provider | The name of the tracing provider. (e.g. jaeger)                   | ✅       |
+| tracing_provider | The name of the tracing provider. (e.g. jaeger)                   | ✅        |
-| tracing_debug    | Will disable [sampling](https://opencensus.io/tracing/sampling/). | ❌       |
+| tracing_debug    | Will disable [sampling](https://opencensus.io/tracing/sampling/). | ❌        |
 #### Jaeger
@ -372,8 +407,8 @@ Each unit work is called a Span in a trace. Spans include metadata about the wor
 | Config Key                        | Description                                 | Required |
 | :-------------------------------- | :------------------------------------------ | -------- |
-| tracing_jaeger_collector_endpoint | Url to the Jaeger HTTP Thrift collector.    | ✅       |
+| tracing_jaeger_collector_endpoint | Url to the Jaeger HTTP Thrift collector.    | ✅        |
-| tracing_jaeger_agent_endpoint     | Send spans to jaeger-agent at this address. | ✅       |
+| tracing_jaeger_agent_endpoint     | Send spans to jaeger-agent at this address. | ✅        |
 #### Example
--- a/internal/kv/autocache/autocache.go
+++ b/internal/kv/autocache/autocache.go
@ -12,9 +12,11 @@ import (
 	"sync"
 	"github.com/golang/groupcache"
 	"github.com/pomerium/autocache"
 	"github.com/pomerium/pomerium/internal/httputil"
 	"github.com/pomerium/pomerium/internal/kv"
 	"github.com/pomerium/pomerium/internal/telemetry/metrics"
 	"github.com/pomerium/pomerium/internal/urlutil"
 )
@ -116,13 +118,14 @@ func New(o *Options) (*Store, error) {
 	}
 	serverOpts := &httputil.ServerOptions{Addr: o.Addr}
 	var wg sync.WaitGroup
-	s.srv, err = httputil.NewServer(serverOpts, QueryParamToCtx(s.cluster), &wg)
+	s.srv, err = httputil.NewServer(serverOpts, metrics.HTTPMetricsHandler("groupcache")(QueryParamToCtx(s.cluster)), &wg)
 	if err != nil {
 		return nil, err
 	}
 	if _, err := s.cluster.Join([]string{o.ClusterDomain}); err != nil {
 		return nil, err
 	}
 	metrics.AddGroupCacheMetrics(s.db)
 	return &s, nil
 }
@ -191,7 +194,9 @@ func (s signedSession) RoundTrip(req *http.Request) (*http.Response, error) {
 	q.Set(defaultQueryParamKey, session)
 	newReqURL.RawQuery = q.Encode()
 	newReq.URL = urlutil.NewSignedURL(s.sharedKey, &newReqURL).Sign()
-	return http.DefaultTransport.RoundTrip(newReq)
+
 	tripper := metrics.HTTPMetricsRoundTripper("cache", "groupcache")(http.DefaultTransport)
 	return tripper.RoundTrip(newReq)
 }
 // QueryParamToCtx takes a value from a query param and adds it to the
--- a/internal/kv/bolt/bolt.go
+++ b/internal/kv/bolt/bolt.go
@ -5,9 +5,10 @@ package bolt
 import (
 	"context"
 	"github.com/pomerium/pomerium/internal/kv"
 	bolt "go.etcd.io/bbolt"
 	"github.com/pomerium/pomerium/internal/kv"
 	"github.com/pomerium/pomerium/internal/telemetry/metrics"
 )
 var _ kv.Store = &Store{}
@ -64,6 +65,7 @@ func New(o *Options) (*Store, error) {
 		return nil, err
 	}
 	metrics.AddBoltDBMetrics(db.Stats)
 	return &Store{db: db, bucket: o.Bucket}, nil
 }
--- a/internal/kv/redis/redis.go
+++ b/internal/kv/redis/redis.go
@ -9,7 +9,9 @@ import (
 	"fmt"
 	"github.com/go-redis/redis/v7"
 	"github.com/pomerium/pomerium/internal/kv"
 	"github.com/pomerium/pomerium/internal/telemetry/metrics"
 )
 var _ kv.Store = &Store{}
@ -56,6 +58,7 @@ func New(o *Options) (*Store, error) {
 		return nil, fmt.Errorf("kv/redis: error connecting to redis: %w", err)
 	}
 	metrics.AddRedisMetrics(db.PoolStats)
 	return &Store{db: db}, nil
 }
--- a/internal/telemetry/metrics/info.go
+++ b/internal/telemetry/metrics/info.go
@ -2,19 +2,14 @@ package metrics
 import (
 	"context"
 	"runtime"
 	"sync"
 	"time"
 	"github.com/pomerium/pomerium/internal/log"
 	"github.com/pomerium/pomerium/internal/version"
 	"go.opencensus.io/metric"
 	"go.opencensus.io/metric/metricdata"
 	"go.opencensus.io/metric/metricproducer"
 	"go.opencensus.io/stats"
 	"go.opencensus.io/stats/view"
 	"go.opencensus.io/tag"
 	"github.com/pomerium/pomerium/internal/log"
 )
 var (
@ -30,7 +25,6 @@ var (
 		"config_last_reload_success",
 		"Returns 1 if last reload was successful",
 		"1")
 	registry = newMetricRegistry()
 	// ConfigLastReloadView contains the timestamp the configuration was last
 	// reloaded, labeled by service.
@ -79,75 +73,6 @@ func SetConfigInfo(service string, success bool) {
 	}
 }
 // metricRegistry holds the non-view metrics and handles safe
 // initialization and updates.  Behavior without using newMetricRegistry()
 // is undefined.
 type metricRegistry struct {
 	registry       *metric.Registry
 	buildInfo      *metric.Int64Gauge
 	policyCount    *metric.Int64DerivedGauge
 	configChecksum *metric.Float64Gauge
 	sync.Once
 }
 func newMetricRegistry() *metricRegistry {
 	r := new(metricRegistry)
 	r.init()
 	return r
 }
 func (r *metricRegistry) init() {
 	r.Do(
 		func() {
 			r.registry = metric.NewRegistry()
 			var err error
 			r.buildInfo, err = r.registry.AddInt64Gauge("build_info",
 				metric.WithDescription("Build Metadata"),
 				metric.WithLabelKeys("service", "version", "revision", "goversion"),
 			)
 			if err != nil {
 				log.Error().Err(err).Msg("telemetry/metrics: failed to register build info metric")
 			}
 			r.configChecksum, err = r.registry.AddFloat64Gauge("config_checksum_decimal",
 				metric.WithDescription("Config checksum represented in decimal notation"),
 				metric.WithLabelKeys("service"),
 			)
 			if err != nil {
 				log.Error().Err(err).Msg("telemetry/metrics: failed to register config checksum metric")
 			}
 			r.policyCount, err = r.registry.AddInt64DerivedGauge("policy_count_total",
 				metric.WithDescription("Total number of policies loaded"),
 				metric.WithLabelKeys("service"),
 			)
 			if err != nil {
 				log.Error().Err(err).Msg("telemetry/metrics: failed to register policy count metric")
 			}
 		})
 }
 // SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to
 // have this exported
 func (r *metricRegistry) setBuildInfo(service string) {
 	if registry.buildInfo == nil {
 		return
 	}
 	m, err := registry.buildInfo.GetEntry(
 		metricdata.NewLabelValue(service),
 		metricdata.NewLabelValue(version.FullVersion()),
 		metricdata.NewLabelValue(version.GitCommit),
 		metricdata.NewLabelValue((runtime.Version())),
 	)
 	if err != nil {
 		log.Error().Err(err).Msg("telemetry/metrics: failed to get build info metric")
 	}
 	// This sets our build_info metric to a constant 1 per
 	// https://www.robustperception.io/exposing-the-software-version-to-prometheus
 	m.Set(1)
 }
 // SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to
 // have this exported
 func SetBuildInfo(service string) {
@ -159,33 +84,12 @@ func RegisterInfoMetrics() {
 	metricproducer.GlobalManager().AddProducer(registry.registry)
 }
 func (r *metricRegistry) setConfigChecksum(service string, checksum uint64) {
 	if r.configChecksum == nil {
 		return
 	}
 	m, err := r.configChecksum.GetEntry(metricdata.NewLabelValue(service))
 	if err != nil {
 		log.Error().Err(err).Msg("telemetry/metrics: failed to get config checksum metric")
 	}
 	m.Set(float64(checksum))
 }
 // SetConfigChecksum creates the configuration checksum metric.  You must call RegisterInfoMetrics to
 // have this exported
 func SetConfigChecksum(service string, checksum uint64) {
 	registry.setConfigChecksum(service, checksum)
 }
 func (r *metricRegistry) addPolicyCountCallback(service string, f func() int64) {
 	if r.policyCount == nil {
 		return
 	}
 	err := r.policyCount.UpsertEntry(f, metricdata.NewLabelValue(service))
 	if err != nil {
 		log.Error().Err(err).Msg("telemetry/metrics: failed to get policy count metric")
 	}
 }
 // AddPolicyCountCallback sets the function to call when exporting the
 // policy count metric.   You must call RegisterInfoMetrics to have this
 // exported
--- a/internal/telemetry/metrics/kv.go
+++ b/internal/telemetry/metrics/kv.go
@ -0,0 +1,107 @@
 package metrics
 import (
 	"github.com/go-redis/redis/v7"
 	"github.com/golang/groupcache"
 	"go.etcd.io/bbolt"
 )
 // AddGroupCacheMetrics registers a metrics handler against a *groupcache.Group
 func AddGroupCacheMetrics(gc *groupcache.Group) {
 	cumulativeMetrics := []struct {
 		name string
 		desc string
 		f    func() int64
 	}{
 		{"groupcache_gets_total", "Total get request, including from peers", gc.Stats.Gets.Get},
 		{"groupcache_cache_hits_total", "Total cache hits in local or cluster cache", gc.Stats.CacheHits.Get},
 		{"groupcache_cache_hits_total", "Total cache hits in local or cluster cache", gc.Stats.CacheHits.Get},
 		{"groupcache_peer_loads_total", "Total remote loads or cache hits without error", gc.Stats.PeerLoads.Get},
 		{"groupcache_peer_errors_total", "Total errors from peers", gc.Stats.PeerErrors.Get},
 		{"groupcache_loads_total", "Total gets without cache hits", gc.Stats.Loads.Get},
 		{"groupcache_loads_deduped_total", "gets without cache hits after duplicate suppression", gc.Stats.LoadsDeduped.Get},
 		{"groupcache_local_loads_total", "Total good local loads", gc.Stats.LocalLoads.Get},
 		{"groupcache_local_load_errs_total", "Total local load errors", gc.Stats.LocalLoadErrs.Get},
 		{"groupcache_server_requests_total", "Total gets from peers", gc.Stats.ServerRequests.Get},
 	}
 	for _, m := range cumulativeMetrics {
 		registry.addInt64DerivedCumulativeMetric(m.name, m.desc, "autocache", m.f)
 	}
 }
 // AddBoltDBMetrics registers a metrics handler against a *bbolt.DB
 func AddBoltDBMetrics(stats func() bbolt.Stats) {
 	gaugeMetrics := []struct {
 		name string
 		desc string
 		f    func() int64
 	}{
 		{"boltdb_free_page_n", "Number of free pages on the freelist", func() int64 { return int64(stats().FreePageN) }},
 		{"boltdb_pending_page_n", "Number of pending pages on the freelist", func() int64 { return int64(stats().PendingPageN) }},
 		{"boltdb_free_alloc_size_bytes", "Bytes allocated in free pages", func() int64 { return int64(stats().FreeAlloc) }},
 		{"boltdb_freelist_inuse_size_bytes", "Bytes used by the freelist", func() int64 { return int64(stats().FreelistInuse) }},
 		{"boltdb_txn", "total number of started read transactions", func() int64 { return int64(stats().TxN) }},
 		{"boltdb_open_txn", "number of currently open read transactions", func() int64 { return int64(stats().OpenTxN) }},
 	}
 	for _, m := range gaugeMetrics {
 		registry.addInt64DerivedGaugeMetric(m.name, m.desc, "boltdb", m.f)
 	}
 	cumulativeMetrics := []struct {
 		name string
 		desc string
 		f    func() int64
 	}{
 		{"boltdb_txn_page_total", "Total number of page allocations", func() int64 { return int64(stats().TxStats.PageCount) }},
 		{"boltdb_txn_page_alloc_size_bytes_total", "Total bytes allocated", func() int64 { return int64(stats().TxStats.PageAlloc) }},
 		{"boltdb_txn_cursor_total", "Total number of cursors created", func() int64 { return int64(stats().TxStats.CursorCount) }},
 		{"boltdb_txn_node_total", "Total number of node allocations", func() int64 { return int64(stats().TxStats.NodeCount) }},
 		{"boltdb_txn_node_deref_total", "Total number of node dereferences", func() int64 { return int64(stats().TxStats.NodeDeref) }},
 		{"boltdb_txn_rebalance_total", "Total number of node rebalances", func() int64 { return int64(stats().TxStats.Rebalance) }},
 		{"boltdb_txn_rebalance_duration_ms_total", "Total time spent rebalancing", func() int64 { return stats().TxStats.RebalanceTime.Milliseconds() }},
 		{"boltdb_txn_split_total", "Total number of nodes split", func() int64 { return int64(stats().TxStats.Split) }},
 		{"boltdb_txn_spill_total", "Total number of nodes spilled", func() int64 { return int64(stats().TxStats.Spill) }},
 		{"boltdb_txn_spill_duration_ms_total", "Total time spent spilling", func() int64 { return stats().TxStats.SpillTime.Milliseconds() }},
 		{"boltdb_txn_write_total", "Total number of writes performed", func() int64 { return int64(stats().TxStats.Write) }},
 		{"boltdb_txn_write_duration_ms_total", "Total time spent writing to disk", func() int64 { return stats().TxStats.WriteTime.Milliseconds() }},
 	}
 	for _, m := range cumulativeMetrics {
 		registry.addInt64DerivedCumulativeMetric(m.name, m.desc, "boltdb", m.f)
 	}
 }
 // AddRedisMetrics registers a metrics handler against a redis Client's PoolStats() method
 func AddRedisMetrics(stats func() *redis.PoolStats) {
 	gaugeMetrics := []struct {
 		name string
 		desc string
 		f    func() int64
 	}{
 		{"redis_conns", "Number of total connections in the pool", func() int64 { return int64(stats().TotalConns) }},
 		{"redis_idle_conns", "Number of idle connections in the pool", func() int64 { return int64(stats().IdleConns) }},
 	}
 	for _, m := range gaugeMetrics {
 		registry.addInt64DerivedGaugeMetric(m.name, m.desc, "redis", m.f)
 	}
 	cumulativeMetrics := []struct {
 		name string
 		desc string
 		f    func() int64
 	}{
 		{"redis_hits_total", "Total number of times free connection was found in the pool", func() int64 { return int64(stats().Hits) }},
 		{"redis_misses_total", "Total number of times free connection was NOT found in the pool", func() int64 { return int64(stats().Misses) }},
 		{"redis_timeouts_total", "Total number of times a wait timeout occurred", func() int64 { return int64(stats().Timeouts) }},
 		{"redis_stale_conns_total", "Total number of stale connections removed from the pool", func() int64 { return int64(stats().StaleConns) }},
 	}
 	for _, m := range cumulativeMetrics {
 		registry.addInt64DerivedCumulativeMetric(m.name, m.desc, "redis", m.f)
 	}
 }
--- a/internal/telemetry/metrics/kv_test.go
+++ b/internal/telemetry/metrics/kv_test.go
@ -0,0 +1,94 @@
 package metrics
 import (
 	"testing"
 	"time"
 	"github.com/go-redis/redis/v7"
 	"github.com/golang/groupcache"
 	"go.etcd.io/bbolt"
 	"go.opencensus.io/metric/metricdata"
 )
 func Test_AddGroupCacheMetrics(t *testing.T) {
 	t.Parallel()
 	gc := &groupcache.Group{}
 	AddGroupCacheMetrics(gc)
 	tests := []struct {
 		name string
 		stat *groupcache.AtomicInt
 		want int64
 	}{
 		{"groupcache_gets_total", &gc.Stats.Gets, 4},
 		{"groupcache_loads_total", &gc.Stats.Loads, 42},
 		{"groupcache_server_requests_total", &gc.Stats.ServerRequests, 8},
 	}
 	labelValues := []metricdata.LabelValue{
 		metricdata.NewLabelValue("autocache"),
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			tt.stat.Add(tt.want)
 			testMetricRetrieval(registry.registry.Read(), t, labelValues, tt.want, tt.name)
 		})
 	}
 }
 func Test_AddBoltDBMetrics(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
 		name string
 		stat bbolt.Stats
 		want int64
 	}{
 		{"boltdb_free_page_n", bbolt.Stats{FreePageN: 14}, 14},
 		{"boltdb_txn", bbolt.Stats{TxN: 88}, 88},
 		{"boltdb_txn_rebalance_duration_ms_total", bbolt.Stats{TxStats: bbolt.TxStats{RebalanceTime: 42 * time.Millisecond}}, 42},
 		{"boltdb_txn_write_total", bbolt.Stats{TxStats: bbolt.TxStats{Write: 42}}, 42},
 	}
 	labelValues := []metricdata.LabelValue{
 		metricdata.NewLabelValue("boltdb"),
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			AddBoltDBMetrics(func() bbolt.Stats { return tt.stat })
 			testMetricRetrieval(registry.registry.Read(), t, labelValues, tt.want, tt.name)
 		})
 	}
 }
 func Test_AddRedisMetrics(t *testing.T) {
 	t.Parallel()
 	tests := []struct {
 		name string
 		stat *redis.PoolStats
 		want int64
 	}{
 		{"redis_conns", &redis.PoolStats{TotalConns: 7}, 7},
 		{"redis_hits_total", &redis.PoolStats{Hits: 78}, 78},
 		{"redis_timeouts_total", &redis.PoolStats{Timeouts: 2}, 2},
 	}
 	labelValues := []metricdata.LabelValue{
 		metricdata.NewLabelValue("redis"),
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			AddRedisMetrics(func() *redis.PoolStats { return tt.stat })
 			testMetricRetrieval(registry.registry.Read(), t, labelValues, tt.want, tt.name)
 		})
 	}
 }
--- a/internal/telemetry/metrics/registry.go
+++ b/internal/telemetry/metrics/registry.go
@ -0,0 +1,142 @@
 package metrics
 import (
 	"runtime"
 	"sync"
 	"go.opencensus.io/metric"
 	"go.opencensus.io/metric/metricdata"
 	"github.com/pomerium/pomerium/internal/log"
 	"github.com/pomerium/pomerium/internal/version"
 )
 var (
 	registry = newMetricRegistry()
 )
 // metricRegistry holds the non-view metrics and handles safe
 // initialization and updates.  Behavior without using newMetricRegistry()
 // is undefined.
 type metricRegistry struct {
 	registry       *metric.Registry
 	buildInfo      *metric.Int64Gauge
 	policyCount    *metric.Int64DerivedGauge
 	configChecksum *metric.Float64Gauge
 	sync.Once
 }
 func newMetricRegistry() *metricRegistry {
 	r := new(metricRegistry)
 	r.init()
 	return r
 }
 func (r *metricRegistry) init() {
 	r.Do(
 		func() {
 			r.registry = metric.NewRegistry()
 			var err error
 			r.buildInfo, err = r.registry.AddInt64Gauge("build_info",
 				metric.WithDescription("Build Metadata"),
 				metric.WithLabelKeys("service", "version", "revision", "goversion"),
 			)
 			if err != nil {
 				log.Error().Err(err).Msg("telemetry/metrics: failed to register build info metric")
 			}
 			r.configChecksum, err = r.registry.AddFloat64Gauge("config_checksum_decimal",
 				metric.WithDescription("Config checksum represented in decimal notation"),
 				metric.WithLabelKeys("service"),
 			)
 			if err != nil {
 				log.Error().Err(err).Msg("telemetry/metrics: failed to register config checksum metric")
 			}
 			r.policyCount, err = r.registry.AddInt64DerivedGauge("policy_count_total",
 				metric.WithDescription("Total number of policies loaded"),
 				metric.WithLabelKeys("service"),
 			)
 			if err != nil {
 				log.Error().Err(err).Msg("telemetry/metrics: failed to register policy count metric")
 			}
 		})
 }
 // SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to
 // have this exported
 func (r *metricRegistry) setBuildInfo(service string) {
 	if registry.buildInfo == nil {
 		return
 	}
 	m, err := registry.buildInfo.GetEntry(
 		metricdata.NewLabelValue(service),
 		metricdata.NewLabelValue(version.FullVersion()),
 		metricdata.NewLabelValue(version.GitCommit),
 		metricdata.NewLabelValue((runtime.Version())),
 	)
 	if err != nil {
 		log.Error().Err(err).Msg("telemetry/metrics: failed to get build info metric")
 	}
 	// This sets our build_info metric to a constant 1 per
 	// https://www.robustperception.io/exposing-the-software-version-to-prometheus
 	m.Set(1)
 }
 func (r *metricRegistry) addPolicyCountCallback(service string, f func() int64) {
 	if r.policyCount == nil {
 		return
 	}
 	err := r.policyCount.UpsertEntry(f, metricdata.NewLabelValue(service))
 	if err != nil {
 		log.Error().Err(err).Msg("telemetry/metrics: failed to get policy count metric")
 	}
 }
 func (r *metricRegistry) setConfigChecksum(service string, checksum uint64) {
 	if r.configChecksum == nil {
 		return
 	}
 	m, err := r.configChecksum.GetEntry(metricdata.NewLabelValue(service))
 	if err != nil {
 		log.Error().Err(err).Msg("telemetry/metrics: failed to get config checksum metric")
 	}
 	m.Set(float64(checksum))
 }
 func (r *metricRegistry) addInt64DerivedGaugeMetric(name string, desc string, service string, f func() int64) {
 	m, err := r.registry.AddInt64DerivedGauge(name, metric.WithDescription(desc), metric.WithLabelKeys("service"))
 	if err != nil {
 		log.Error().Err(err).Str("service", service).Msg("telemetry/metrics: failed to register metric")
 		return
 	}
 	err = m.UpsertEntry(
 		f,
 		metricdata.NewLabelValue(service),
 	)
 	if err != nil {
 		log.Error().Err(err).Str("service", service).Msg("telemetry/metrics: failed to update metric")
 		return
 	}
 }
 func (r *metricRegistry) addInt64DerivedCumulativeMetric(name string, desc string, service string, f func() int64) {
 	m, err := r.registry.AddInt64DerivedCumulative(name, metric.WithDescription(desc), metric.WithLabelKeys("service"))
 	if err != nil {
 		log.Error().Err(err).Str("service", service).Msg("telemetry/metrics: failed to register metric")
 		return
 	}
 	err = m.UpsertEntry(
 		f,
 		metricdata.NewLabelValue(service),
 	)
 	if err != nil {
 		log.Error().Err(err).Str("service", service).Msg("telemetry/metrics: failed to update metric")
 		return
 	}
 }