use build_info as liveness gauge metric (#1940)

This commit is contained in:
wasaga 2021-02-24 10:57:31 -05:00 committed by GitHub
parent cdcb65b77c
commit de55199a70
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 65 additions and 17 deletions

View file

@ -2,6 +2,7 @@ package config
import ( import (
"net/http" "net/http"
"os"
"sync" "sync"
"github.com/pomerium/pomerium/internal/httputil" "github.com/pomerium/pomerium/internal/httputil"
@ -57,7 +58,13 @@ func (mgr *MetricsManager) updateInfo(cfg *Config) {
return return
} }
metrics.SetBuildInfo(serviceName) hostname, err := os.Hostname()
if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to get OS hostname")
hostname = "__unknown__"
}
metrics.SetBuildInfo(serviceName, hostname)
mgr.serviceName = serviceName mgr.serviceName = serviceName
} }

View file

@ -8,6 +8,8 @@ import (
) )
func testMetricRetrieval(metrics []*metricdata.Metric, t *testing.T, labels []metricdata.LabelValue, value interface{}, name string) { func testMetricRetrieval(metrics []*metricdata.Metric, t *testing.T, labels []metricdata.LabelValue, value interface{}, name string) {
t.Helper()
switch value.(type) { switch value.(type) {
case int64: case int64:
case float64: case float64:

View file

@ -10,6 +10,7 @@ import (
"go.opencensus.io/tag" "go.opencensus.io/tag"
"github.com/pomerium/pomerium/internal/log" "github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/pkg/metrics"
) )
var ( var (
@ -22,15 +23,15 @@ var (
} }
configLastReload = stats.Int64( configLastReload = stats.Int64(
"config_last_reload_success_timestamp", metrics.ConfigLastReloadTimestampSeconds,
"Timestamp of last successful config reload", "Timestamp of last successful config reload",
"seconds") "seconds")
configLastReloadSuccess = stats.Int64( configLastReloadSuccess = stats.Int64(
"config_last_reload_success", metrics.ConfigLastReloadSuccess,
"Returns 1 if last reload was successful", "Returns 1 if last reload was successful",
"1") "1")
identityManagerLastRefresh = stats.Int64( identityManagerLastRefresh = stats.Int64(
"identity_manager_last_refresh_timestamp", metrics.IdentityManagerLastRefreshTimestamp,
"Timestamp of last directory refresh", "Timestamp of last directory refresh",
"seconds", "seconds",
) )
@ -97,8 +98,8 @@ func SetConfigInfo(service string, success bool) {
// SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to // SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to
// have this exported // have this exported
func SetBuildInfo(service string) { func SetBuildInfo(service, hostname string) {
registry.setBuildInfo(service) registry.setBuildInfo(service, hostname)
} }
// RegisterInfoMetrics registers non-view based metrics registry globally for export // RegisterInfoMetrics registers non-view based metrics registry globally for export

View file

@ -5,6 +5,7 @@ import (
"testing" "testing"
"github.com/pomerium/pomerium/internal/version" "github.com/pomerium/pomerium/internal/version"
"github.com/pomerium/pomerium/pkg/metrics"
"go.opencensus.io/metric/metricdata" "go.opencensus.io/metric/metricdata"
"go.opencensus.io/metric/metricproducer" "go.opencensus.io/metric/metricproducer"
@ -45,10 +46,11 @@ func Test_SetBuildInfo(t *testing.T) {
{Value: version.FullVersion(), Present: true}, {Value: version.FullVersion(), Present: true},
{Value: version.GitCommit, Present: true}, {Value: version.GitCommit, Present: true},
{Value: runtime.Version(), Present: true}, {Value: runtime.Version(), Present: true},
{Value: "test_host", Present: true},
} }
SetBuildInfo("test_service") SetBuildInfo("test_service", "test_host")
testMetricRetrieval(registry.registry.Read(), t, wantLabels, int64(1), "build_info") testMetricRetrieval(registry.registry.Read(), t, wantLabels, int64(1), metrics.BuildInfo)
} }
func Test_AddPolicyCountCallback(t *testing.T) { func Test_AddPolicyCountCallback(t *testing.T) {
@ -58,7 +60,7 @@ func Test_AddPolicyCountCallback(t *testing.T) {
wantLabels := []metricdata.LabelValue{{Value: "test_service", Present: true}} wantLabels := []metricdata.LabelValue{{Value: "test_service", Present: true}}
AddPolicyCountCallback("test_service", func() int64 { return wantValue }) AddPolicyCountCallback("test_service", func() int64 { return wantValue })
testMetricRetrieval(registry.registry.Read(), t, wantLabels, wantValue, "policy_count_total") testMetricRetrieval(registry.registry.Read(), t, wantLabels, wantValue, metrics.PolicyCountTotal)
} }
func Test_SetConfigChecksum(t *testing.T) { func Test_SetConfigChecksum(t *testing.T) {
@ -68,7 +70,7 @@ func Test_SetConfigChecksum(t *testing.T) {
wantLabels := []metricdata.LabelValue{{Value: "test_service", Present: true}} wantLabels := []metricdata.LabelValue{{Value: "test_service", Present: true}}
SetConfigChecksum("test_service", wantValue) SetConfigChecksum("test_service", wantValue)
testMetricRetrieval(registry.registry.Read(), t, wantLabels, float64(wantValue), "config_checksum_decimal") testMetricRetrieval(registry.registry.Read(), t, wantLabels, float64(wantValue), metrics.ConfigChecksumDecimal)
} }
func Test_RegisterInfoMetrics(t *testing.T) { func Test_RegisterInfoMetrics(t *testing.T) {

View file

@ -9,6 +9,7 @@ import (
"github.com/pomerium/pomerium/internal/log" "github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/version" "github.com/pomerium/pomerium/internal/version"
"github.com/pomerium/pomerium/pkg/metrics"
) )
var registry = newMetricRegistry() var registry = newMetricRegistry()
@ -37,25 +38,31 @@ func (r *metricRegistry) init() {
func() { func() {
r.registry = metric.NewRegistry() r.registry = metric.NewRegistry()
var err error var err error
r.buildInfo, err = r.registry.AddInt64Gauge("build_info", r.buildInfo, err = r.registry.AddInt64Gauge(metrics.BuildInfo,
metric.WithDescription("Build Metadata"), metric.WithDescription("Build Metadata"),
metric.WithLabelKeys("service", "version", "revision", "goversion"), metric.WithLabelKeys(
metrics.ServiceLabel,
metrics.VersionLabel,
metrics.RevisionLabel,
metrics.GoVersionLabel,
metrics.HostLabel,
),
) )
if err != nil { if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to register build info metric") log.Error().Err(err).Msg("telemetry/metrics: failed to register build info metric")
} }
r.configChecksum, err = r.registry.AddFloat64Gauge("config_checksum_decimal", r.configChecksum, err = r.registry.AddFloat64Gauge(metrics.ConfigChecksumDecimal,
metric.WithDescription("Config checksum represented in decimal notation"), metric.WithDescription("Config checksum represented in decimal notation"),
metric.WithLabelKeys("service"), metric.WithLabelKeys(metrics.ServiceLabel),
) )
if err != nil { if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to register config checksum metric") log.Error().Err(err).Msg("telemetry/metrics: failed to register config checksum metric")
} }
r.policyCount, err = r.registry.AddInt64DerivedGauge("policy_count_total", r.policyCount, err = r.registry.AddInt64DerivedGauge(metrics.PolicyCountTotal,
metric.WithDescription("Total number of policies loaded"), metric.WithDescription("Total number of policies loaded"),
metric.WithLabelKeys("service"), metric.WithLabelKeys(metrics.ServiceLabel),
) )
if err != nil { if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to register policy count metric") log.Error().Err(err).Msg("telemetry/metrics: failed to register policy count metric")
@ -65,7 +72,7 @@ func (r *metricRegistry) init() {
// SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to // SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to
// have this exported // have this exported
func (r *metricRegistry) setBuildInfo(service string) { func (r *metricRegistry) setBuildInfo(service, hostname string) {
if registry.buildInfo == nil { if registry.buildInfo == nil {
return return
} }
@ -74,6 +81,7 @@ func (r *metricRegistry) setBuildInfo(service string) {
metricdata.NewLabelValue(version.FullVersion()), metricdata.NewLabelValue(version.FullVersion()),
metricdata.NewLabelValue(version.GitCommit), metricdata.NewLabelValue(version.GitCommit),
metricdata.NewLabelValue((runtime.Version())), metricdata.NewLabelValue((runtime.Version())),
metricdata.NewLabelValue(hostname),
) )
if err != nil { if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to get build info metric") log.Error().Err(err).Msg("telemetry/metrics: failed to get build info metric")

28
pkg/metrics/constants.go Normal file
View file

@ -0,0 +1,28 @@
// Package metrics declares metrics names and labels that pomerium exposes
// as constants that could be referred to from other projects
package metrics
// metrics
const (
// ConfigLastReloadTimestampSeconds is unix timestamp when configuration was last reloaded
ConfigLastReloadTimestampSeconds = "config_last_reload_success_timestamp"
// ConfigLastReloadSuccess is set to 1 if last configuration was successfully reloaded
ConfigLastReloadSuccess = "config_last_reload_success"
// IdentityManagerLastRefreshTimestamp is IdP sync timestamp
IdentityManagerLastRefreshTimestamp = "identity_manager_last_refresh_timestamp"
// BuildInfo is a gauge that may be used to detect whether component is live, and also has version
BuildInfo = "build_info"
// PolicyCountTotal is total amount of routes currently configured
PolicyCountTotal = "policy_count_total"
// ConfigChecksumDecimal should only be used to compare config on a single node, it will be different in multi-node environment
ConfigChecksumDecimal = "config_checksum_decimal"
)
// labels
const (
ServiceLabel = "service"
VersionLabel = "version"
RevisionLabel = "revision"
GoVersionLabel = "goversion"
HostLabel = "host"
)