diff --git a/config/metrics.go b/config/metrics.go index ab817a214..953e6508b 100644 --- a/config/metrics.go +++ b/config/metrics.go @@ -2,6 +2,7 @@ package config import ( "net/http" + "os" "sync" "github.com/pomerium/pomerium/internal/httputil" @@ -57,7 +58,13 @@ func (mgr *MetricsManager) updateInfo(cfg *Config) { return } - metrics.SetBuildInfo(serviceName) + hostname, err := os.Hostname() + if err != nil { + log.Error().Err(err).Msg("telemetry/metrics: failed to get OS hostname") + hostname = "__unknown__" + } + + metrics.SetBuildInfo(serviceName, hostname) mgr.serviceName = serviceName } diff --git a/internal/telemetry/metrics/helpers_test.go b/internal/telemetry/metrics/helpers_test.go index 42db9dda1..e930e8822 100644 --- a/internal/telemetry/metrics/helpers_test.go +++ b/internal/telemetry/metrics/helpers_test.go @@ -8,6 +8,8 @@ import ( ) func testMetricRetrieval(metrics []*metricdata.Metric, t *testing.T, labels []metricdata.LabelValue, value interface{}, name string) { + t.Helper() + switch value.(type) { case int64: case float64: diff --git a/internal/telemetry/metrics/info.go b/internal/telemetry/metrics/info.go index 48cf830c2..449595c3e 100644 --- a/internal/telemetry/metrics/info.go +++ b/internal/telemetry/metrics/info.go @@ -10,6 +10,7 @@ import ( "go.opencensus.io/tag" "github.com/pomerium/pomerium/internal/log" + "github.com/pomerium/pomerium/pkg/metrics" ) var ( @@ -22,15 +23,15 @@ var ( } configLastReload = stats.Int64( - "config_last_reload_success_timestamp", + metrics.ConfigLastReloadTimestampSeconds, "Timestamp of last successful config reload", "seconds") configLastReloadSuccess = stats.Int64( - "config_last_reload_success", + metrics.ConfigLastReloadSuccess, "Returns 1 if last reload was successful", "1") identityManagerLastRefresh = stats.Int64( - "identity_manager_last_refresh_timestamp", + metrics.IdentityManagerLastRefreshTimestamp, "Timestamp of last directory refresh", "seconds", ) @@ -97,8 +98,8 @@ func SetConfigInfo(service string, success bool) { // SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to // have this exported -func SetBuildInfo(service string) { - registry.setBuildInfo(service) +func SetBuildInfo(service, hostname string) { + registry.setBuildInfo(service, hostname) } // RegisterInfoMetrics registers non-view based metrics registry globally for export diff --git a/internal/telemetry/metrics/info_test.go b/internal/telemetry/metrics/info_test.go index a235a114e..4c68ec13c 100644 --- a/internal/telemetry/metrics/info_test.go +++ b/internal/telemetry/metrics/info_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/pomerium/pomerium/internal/version" + "github.com/pomerium/pomerium/pkg/metrics" "go.opencensus.io/metric/metricdata" "go.opencensus.io/metric/metricproducer" @@ -45,10 +46,11 @@ func Test_SetBuildInfo(t *testing.T) { {Value: version.FullVersion(), Present: true}, {Value: version.GitCommit, Present: true}, {Value: runtime.Version(), Present: true}, + {Value: "test_host", Present: true}, } - SetBuildInfo("test_service") - testMetricRetrieval(registry.registry.Read(), t, wantLabels, int64(1), "build_info") + SetBuildInfo("test_service", "test_host") + testMetricRetrieval(registry.registry.Read(), t, wantLabels, int64(1), metrics.BuildInfo) } func Test_AddPolicyCountCallback(t *testing.T) { @@ -58,7 +60,7 @@ func Test_AddPolicyCountCallback(t *testing.T) { wantLabels := []metricdata.LabelValue{{Value: "test_service", Present: true}} AddPolicyCountCallback("test_service", func() int64 { return wantValue }) - testMetricRetrieval(registry.registry.Read(), t, wantLabels, wantValue, "policy_count_total") + testMetricRetrieval(registry.registry.Read(), t, wantLabels, wantValue, metrics.PolicyCountTotal) } func Test_SetConfigChecksum(t *testing.T) { @@ -68,7 +70,7 @@ func Test_SetConfigChecksum(t *testing.T) { wantLabels := []metricdata.LabelValue{{Value: "test_service", Present: true}} SetConfigChecksum("test_service", wantValue) - testMetricRetrieval(registry.registry.Read(), t, wantLabels, float64(wantValue), "config_checksum_decimal") + testMetricRetrieval(registry.registry.Read(), t, wantLabels, float64(wantValue), metrics.ConfigChecksumDecimal) } func Test_RegisterInfoMetrics(t *testing.T) { diff --git a/internal/telemetry/metrics/registry.go b/internal/telemetry/metrics/registry.go index 1d3efd80f..3f7e8fb42 100644 --- a/internal/telemetry/metrics/registry.go +++ b/internal/telemetry/metrics/registry.go @@ -9,6 +9,7 @@ import ( "github.com/pomerium/pomerium/internal/log" "github.com/pomerium/pomerium/internal/version" + "github.com/pomerium/pomerium/pkg/metrics" ) var registry = newMetricRegistry() @@ -37,25 +38,31 @@ func (r *metricRegistry) init() { func() { r.registry = metric.NewRegistry() var err error - r.buildInfo, err = r.registry.AddInt64Gauge("build_info", + r.buildInfo, err = r.registry.AddInt64Gauge(metrics.BuildInfo, metric.WithDescription("Build Metadata"), - metric.WithLabelKeys("service", "version", "revision", "goversion"), + metric.WithLabelKeys( + metrics.ServiceLabel, + metrics.VersionLabel, + metrics.RevisionLabel, + metrics.GoVersionLabel, + metrics.HostLabel, + ), ) if err != nil { log.Error().Err(err).Msg("telemetry/metrics: failed to register build info metric") } - r.configChecksum, err = r.registry.AddFloat64Gauge("config_checksum_decimal", + r.configChecksum, err = r.registry.AddFloat64Gauge(metrics.ConfigChecksumDecimal, metric.WithDescription("Config checksum represented in decimal notation"), - metric.WithLabelKeys("service"), + metric.WithLabelKeys(metrics.ServiceLabel), ) if err != nil { log.Error().Err(err).Msg("telemetry/metrics: failed to register config checksum metric") } - r.policyCount, err = r.registry.AddInt64DerivedGauge("policy_count_total", + r.policyCount, err = r.registry.AddInt64DerivedGauge(metrics.PolicyCountTotal, metric.WithDescription("Total number of policies loaded"), - metric.WithLabelKeys("service"), + metric.WithLabelKeys(metrics.ServiceLabel), ) if err != nil { log.Error().Err(err).Msg("telemetry/metrics: failed to register policy count metric") @@ -65,7 +72,7 @@ func (r *metricRegistry) init() { // SetBuildInfo records the pomerium build info. You must call RegisterInfoMetrics to // have this exported -func (r *metricRegistry) setBuildInfo(service string) { +func (r *metricRegistry) setBuildInfo(service, hostname string) { if registry.buildInfo == nil { return } @@ -74,6 +81,7 @@ func (r *metricRegistry) setBuildInfo(service string) { metricdata.NewLabelValue(version.FullVersion()), metricdata.NewLabelValue(version.GitCommit), metricdata.NewLabelValue((runtime.Version())), + metricdata.NewLabelValue(hostname), ) if err != nil { log.Error().Err(err).Msg("telemetry/metrics: failed to get build info metric") diff --git a/pkg/metrics/constants.go b/pkg/metrics/constants.go new file mode 100644 index 000000000..62d75feee --- /dev/null +++ b/pkg/metrics/constants.go @@ -0,0 +1,28 @@ +// Package metrics declares metrics names and labels that pomerium exposes +// as constants that could be referred to from other projects +package metrics + +// metrics +const ( + // ConfigLastReloadTimestampSeconds is unix timestamp when configuration was last reloaded + ConfigLastReloadTimestampSeconds = "config_last_reload_success_timestamp" + // ConfigLastReloadSuccess is set to 1 if last configuration was successfully reloaded + ConfigLastReloadSuccess = "config_last_reload_success" + // IdentityManagerLastRefreshTimestamp is IdP sync timestamp + IdentityManagerLastRefreshTimestamp = "identity_manager_last_refresh_timestamp" + // BuildInfo is a gauge that may be used to detect whether component is live, and also has version + BuildInfo = "build_info" + // PolicyCountTotal is total amount of routes currently configured + PolicyCountTotal = "policy_count_total" + // ConfigChecksumDecimal should only be used to compare config on a single node, it will be different in multi-node environment + ConfigChecksumDecimal = "config_checksum_decimal" +) + +// labels +const ( + ServiceLabel = "service" + VersionLabel = "version" + RevisionLabel = "revision" + GoVersionLabel = "goversion" + HostLabel = "host" +)