use build_info as liveness gauge metric (#1940)

This commit is contained in:
wasaga 2021-02-24 10:57:31 -05:00 committed by GitHub
parent cdcb65b77c
commit de55199a70
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 65 additions and 17 deletions

View file

@ -2,6 +2,7 @@ package config
import (
"net/http"
"os"
"sync"
"github.com/pomerium/pomerium/internal/httputil"
@ -57,7 +58,13 @@ func (mgr *MetricsManager) updateInfo(cfg *Config) {
return
}
metrics.SetBuildInfo(serviceName)
hostname, err := os.Hostname()
if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to get OS hostname")
hostname = "__unknown__"
}
metrics.SetBuildInfo(serviceName, hostname)
mgr.serviceName = serviceName
}

View file

@ -8,6 +8,8 @@ import (
)
func testMetricRetrieval(metrics []*metricdata.Metric, t *testing.T, labels []metricdata.LabelValue, value interface{}, name string) {
t.Helper()
switch value.(type) {
case int64:
case float64:

View file

@ -10,6 +10,7 @@ import (
"go.opencensus.io/tag"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/pkg/metrics"
)
var (
@ -22,15 +23,15 @@ var (
}
configLastReload = stats.Int64(
"config_last_reload_success_timestamp",
metrics.ConfigLastReloadTimestampSeconds,
"Timestamp of last successful config reload",
"seconds")
configLastReloadSuccess = stats.Int64(
"config_last_reload_success",
metrics.ConfigLastReloadSuccess,
"Returns 1 if last reload was successful",
"1")
identityManagerLastRefresh = stats.Int64(
"identity_manager_last_refresh_timestamp",
metrics.IdentityManagerLastRefreshTimestamp,
"Timestamp of last directory refresh",
"seconds",
)
@ -97,8 +98,8 @@ func SetConfigInfo(service string, success bool) {
// SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to
// have this exported
func SetBuildInfo(service string) {
registry.setBuildInfo(service)
func SetBuildInfo(service, hostname string) {
registry.setBuildInfo(service, hostname)
}
// RegisterInfoMetrics registers non-view based metrics registry globally for export

View file

@ -5,6 +5,7 @@ import (
"testing"
"github.com/pomerium/pomerium/internal/version"
"github.com/pomerium/pomerium/pkg/metrics"
"go.opencensus.io/metric/metricdata"
"go.opencensus.io/metric/metricproducer"
@ -45,10 +46,11 @@ func Test_SetBuildInfo(t *testing.T) {
{Value: version.FullVersion(), Present: true},
{Value: version.GitCommit, Present: true},
{Value: runtime.Version(), Present: true},
{Value: "test_host", Present: true},
}
SetBuildInfo("test_service")
testMetricRetrieval(registry.registry.Read(), t, wantLabels, int64(1), "build_info")
SetBuildInfo("test_service", "test_host")
testMetricRetrieval(registry.registry.Read(), t, wantLabels, int64(1), metrics.BuildInfo)
}
func Test_AddPolicyCountCallback(t *testing.T) {
@ -58,7 +60,7 @@ func Test_AddPolicyCountCallback(t *testing.T) {
wantLabels := []metricdata.LabelValue{{Value: "test_service", Present: true}}
AddPolicyCountCallback("test_service", func() int64 { return wantValue })
testMetricRetrieval(registry.registry.Read(), t, wantLabels, wantValue, "policy_count_total")
testMetricRetrieval(registry.registry.Read(), t, wantLabels, wantValue, metrics.PolicyCountTotal)
}
func Test_SetConfigChecksum(t *testing.T) {
@ -68,7 +70,7 @@ func Test_SetConfigChecksum(t *testing.T) {
wantLabels := []metricdata.LabelValue{{Value: "test_service", Present: true}}
SetConfigChecksum("test_service", wantValue)
testMetricRetrieval(registry.registry.Read(), t, wantLabels, float64(wantValue), "config_checksum_decimal")
testMetricRetrieval(registry.registry.Read(), t, wantLabels, float64(wantValue), metrics.ConfigChecksumDecimal)
}
func Test_RegisterInfoMetrics(t *testing.T) {

View file

@ -9,6 +9,7 @@ import (
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/version"
"github.com/pomerium/pomerium/pkg/metrics"
)
var registry = newMetricRegistry()
@ -37,25 +38,31 @@ func (r *metricRegistry) init() {
func() {
r.registry = metric.NewRegistry()
var err error
r.buildInfo, err = r.registry.AddInt64Gauge("build_info",
r.buildInfo, err = r.registry.AddInt64Gauge(metrics.BuildInfo,
metric.WithDescription("Build Metadata"),
metric.WithLabelKeys("service", "version", "revision", "goversion"),
metric.WithLabelKeys(
metrics.ServiceLabel,
metrics.VersionLabel,
metrics.RevisionLabel,
metrics.GoVersionLabel,
metrics.HostLabel,
),
)
if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to register build info metric")
}
r.configChecksum, err = r.registry.AddFloat64Gauge("config_checksum_decimal",
r.configChecksum, err = r.registry.AddFloat64Gauge(metrics.ConfigChecksumDecimal,
metric.WithDescription("Config checksum represented in decimal notation"),
metric.WithLabelKeys("service"),
metric.WithLabelKeys(metrics.ServiceLabel),
)
if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to register config checksum metric")
}
r.policyCount, err = r.registry.AddInt64DerivedGauge("policy_count_total",
r.policyCount, err = r.registry.AddInt64DerivedGauge(metrics.PolicyCountTotal,
metric.WithDescription("Total number of policies loaded"),
metric.WithLabelKeys("service"),
metric.WithLabelKeys(metrics.ServiceLabel),
)
if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to register policy count metric")
@ -65,7 +72,7 @@ func (r *metricRegistry) init() {
// SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to
// have this exported
func (r *metricRegistry) setBuildInfo(service string) {
func (r *metricRegistry) setBuildInfo(service, hostname string) {
if registry.buildInfo == nil {
return
}
@ -74,6 +81,7 @@ func (r *metricRegistry) setBuildInfo(service string) {
metricdata.NewLabelValue(version.FullVersion()),
metricdata.NewLabelValue(version.GitCommit),
metricdata.NewLabelValue((runtime.Version())),
metricdata.NewLabelValue(hostname),
)
if err != nil {
log.Error().Err(err).Msg("telemetry/metrics: failed to get build info metric")

28
pkg/metrics/constants.go Normal file
View file

@ -0,0 +1,28 @@
// Package metrics declares metrics names and labels that pomerium exposes
// as constants that could be referred to from other projects
package metrics
// metrics
const (
// ConfigLastReloadTimestampSeconds is unix timestamp when configuration was last reloaded
ConfigLastReloadTimestampSeconds = "config_last_reload_success_timestamp"
// ConfigLastReloadSuccess is set to 1 if last configuration was successfully reloaded
ConfigLastReloadSuccess = "config_last_reload_success"
// IdentityManagerLastRefreshTimestamp is IdP sync timestamp
IdentityManagerLastRefreshTimestamp = "identity_manager_last_refresh_timestamp"
// BuildInfo is a gauge that may be used to detect whether component is live, and also has version
BuildInfo = "build_info"
// PolicyCountTotal is total amount of routes currently configured
PolicyCountTotal = "policy_count_total"
// ConfigChecksumDecimal should only be used to compare config on a single node, it will be different in multi-node environment
ConfigChecksumDecimal = "config_checksum_decimal"
)
// labels
const (
ServiceLabel = "service"
VersionLabel = "version"
RevisionLabel = "revision"
GoVersionLabel = "goversion"
HostLabel = "host"
)