mirror of
https://github.com/pomerium/pomerium.git
synced 2025-04-29 02:16:28 +02:00
more idp metrics (#2842)
This commit is contained in:
parent
6b949a5c2e
commit
c19dd80fe6
4 changed files with 269 additions and 8 deletions
|
@ -510,6 +510,8 @@ Expose a prometheus endpoint on the specified port.
|
|||
|
||||
#### Pomerium Metrics Tracked
|
||||
|
||||
Each metric exposed by Pomerium has a `pomerium` prefix, which is omitted in the table below for brevity.
|
||||
|
||||
Name | Type | Description
|
||||
--------------------------------------------- | --------- | -----------------------------------------------------------------------
|
||||
grpc_client_request_duration_ms | Histogram | GRPC client request duration by service
|
||||
|
@ -528,21 +530,41 @@ http_server_request_duration_ms | Histogram | HTTP server request
|
|||
http_server_request_size_bytes | Histogram | HTTP server request size by service
|
||||
http_server_requests_total | Counter | Total HTTP server requests handled by service
|
||||
http_server_response_size_bytes | Histogram | HTTP server response size by service
|
||||
pomerium_build_info | Gauge | Pomerium build metadata by git revision, service, version and goversion
|
||||
pomerium_config_checksum_int64 | Gauge | Currently loaded configuration checksum by service
|
||||
pomerium_config_last_reload_success | Gauge | Whether the last configuration reload succeeded by service
|
||||
pomerium_config_last_reload_success_timestamp | Gauge | The timestamp of the last successful configuration reload by service
|
||||
build_info | Gauge | Pomerium build metadata by git revision, service, version and goversion
|
||||
config_checksum_int64 | Gauge | Currently loaded configuration checksum by service
|
||||
config_last_reload_success | Gauge | Whether the last configuration reload succeeded by service
|
||||
config_last_reload_success_timestamp | Gauge | The timestamp of the last successful configuration reload by service
|
||||
redis_conns | Gauge | Number of total connections in the pool
|
||||
redis_idle_conns | Gauge | Total number of times free connection was found in the pool
|
||||
redis_wait_count_total | Counter | Total number of connections waited for
|
||||
redis_wait_duration_ms_total | Counter | Total time spent waiting for connections
|
||||
storage_operation_duration_ms | Histogram | Storage operation duration by operation, result, backend and service
|
||||
|
||||
#### Identity Manager
|
||||
|
||||
Identity manager metrics have `pomerium_identity_manager` prefix.
|
||||
|
||||
Name | Type | Description
|
||||
--------------------------------------------- | --------- | -----------------------------------------------------------------------
|
||||
last_refresh_timestamp | Gauge | Timestamp of last directory refresh operation.
|
||||
user_refresh_success_timestamp | Gauge | Timestamp of last successful user refresh.
|
||||
user_refresh_error_timestamp | Gauge | Timestamp of last user refresh ended in an error.
|
||||
user_refresh_errors | Counter | User refresh error counter.
|
||||
user_refresh_success | Counter | User refresh success counter.
|
||||
user_group_refresh_success_timestamp | Gauge | Timestamp of last group successful user refresh.
|
||||
user_group_refresh_error_timestamp | Gauge | Timestamp of last user group refresh ended in an error.
|
||||
user_group_refresh_errors | Counter | User group refresh error counter.
|
||||
user_group_refresh_success | Counter | User group refresh success counter.
|
||||
session_refresh_success_timestamp | Gauge | Timestamp of last successful session refresh.
|
||||
session_refresh_error_timestamp | Gauge | Timestamp of last session refresh ended in an error.
|
||||
session_refresh_errors | Counter | Session refresh error counter.
|
||||
session_refresh_success | Counter | Session refresh success counter.
|
||||
|
||||
#### Envoy Proxy Metrics
|
||||
|
||||
As of `v0.9`, Pomerium uses [envoy](https://www.envoyproxy.io/) for the data plane. As such, proxy related metrics are sourced from envoy, and use envoy's internal [stats data model](https://www.envoyproxy.io/docs/envoy/latest/operations/stats_overview). Please see Envoy's documentation for information about specific metrics.
|
||||
|
||||
All metrics coming from envoy will be labeled with `service="pomerium"` or `service="pomerium-proxy"`, depending if you're running all-in-one or distributed service mode.
|
||||
All metrics coming from envoy will be labeled with `service="pomerium"` or `service="pomerium-proxy"`, depending if you're running all-in-one or distributed service mode and have `pomerium` prefix added to the standard envoy metric name.
|
||||
|
||||
|
||||
### Metrics Basic Authentication
|
||||
|
|
|
@ -216,6 +216,7 @@ func (mgr *Manager) refreshDirectoryUserGroups(ctx context.Context) (nextRefresh
|
|||
defer clearTimeout()
|
||||
|
||||
directoryGroups, directoryUsers, err := mgr.cfg.Load().directory.UserGroups(ctx)
|
||||
metrics.RecordIdentityManagerUserGroupRefresh(ctx, err)
|
||||
if err != nil {
|
||||
msg := "failed to refresh directory users and groups"
|
||||
if ctx.Err() != nil {
|
||||
|
@ -234,7 +235,7 @@ func (mgr *Manager) refreshDirectoryUserGroups(ctx context.Context) (nextRefresh
|
|||
mgr.mergeGroups(ctx, directoryGroups)
|
||||
mgr.mergeUsers(ctx, directoryUsers)
|
||||
|
||||
metrics.RecordIdentityManagerLastRefresh()
|
||||
metrics.RecordIdentityManagerLastRefresh(ctx)
|
||||
|
||||
return mgr.cfg.Load().groupRefreshInterval
|
||||
}
|
||||
|
@ -404,6 +405,7 @@ func (mgr *Manager) refreshSession(ctx context.Context, userID, sessionID string
|
|||
}
|
||||
|
||||
newToken, err := mgr.cfg.Load().authenticator.Refresh(ctx, FromOAuthToken(s.OauthToken), &s)
|
||||
metrics.RecordIdentityManagerSessionRefresh(ctx, err)
|
||||
if isTemporaryError(err) {
|
||||
log.Error(ctx).Err(err).
|
||||
Str("user_id", s.GetUserId()).
|
||||
|
@ -472,6 +474,7 @@ func (mgr *Manager) refreshUser(ctx context.Context, userID string) {
|
|||
}
|
||||
|
||||
err := mgr.cfg.Load().authenticator.UpdateUserInfo(ctx, FromOAuthToken(s.OauthToken), &u)
|
||||
metrics.RecordIdentityManagerUserRefresh(ctx, err)
|
||||
if isTemporaryError(err) {
|
||||
log.Error(ctx).Err(err).
|
||||
Str("user_id", s.GetUserId()).
|
||||
|
|
|
@ -21,6 +21,22 @@ var (
|
|||
ConfigLastReloadView,
|
||||
ConfigLastReloadSuccessView,
|
||||
IdentityManagerLastRefreshView,
|
||||
|
||||
IdentityManagerLastUserRefreshErrorTimestampView,
|
||||
IdentityManagerLastUserRefreshErrorView,
|
||||
IdentityManagerLastUserRefreshSuccessTimestampView,
|
||||
IdentityManagerLastUserRefreshSuccessView,
|
||||
|
||||
IdentityManagerLastUserGroupRefreshErrorTimestampView,
|
||||
IdentityManagerLastUserGroupRefreshErrorView,
|
||||
IdentityManagerLastUserGroupRefreshSuccessTimestampView,
|
||||
IdentityManagerLastUserGroupRefreshSuccessView,
|
||||
|
||||
IdentityManagerLastSessionRefreshErrorTimestampView,
|
||||
IdentityManagerLastSessionRefreshErrorView,
|
||||
IdentityManagerLastSessionRefreshSuccessTimestampView,
|
||||
IdentityManagerLastSessionRefreshSuccessView,
|
||||
|
||||
ConfigDBVersionView,
|
||||
ConfigDBErrorsView,
|
||||
}
|
||||
|
@ -49,6 +65,69 @@ var (
|
|||
"seconds",
|
||||
)
|
||||
|
||||
identityManagerLastUserRefreshSuccessTimestamp = stats.Int64(
|
||||
metrics.IdentityManagerLastUserRefreshSuccessTimestamp,
|
||||
"Timestamp of last successful directory user refresh success",
|
||||
stats.UnitSeconds,
|
||||
)
|
||||
identityManagerLastUserRefreshErrorTimestamp = stats.Int64(
|
||||
metrics.IdentityManagerLastUserRefreshErrorTimestamp,
|
||||
"Timestamp of last directory user refresh error",
|
||||
stats.UnitSeconds,
|
||||
)
|
||||
identityManagerLastUserRefreshSuccess = stats.Int64(
|
||||
metrics.IdentityManagerLastUserRefreshSuccess,
|
||||
"Total successful directory user refresh requests",
|
||||
stats.UnitDimensionless,
|
||||
)
|
||||
identityManagerLastUserRefreshError = stats.Int64(
|
||||
metrics.IdentityManagerLastUserRefreshError,
|
||||
"Total successful directory user refresh errors",
|
||||
stats.UnitDimensionless,
|
||||
)
|
||||
|
||||
identityManagerLastUserGroupRefreshSuccessTimestamp = stats.Int64(
|
||||
metrics.IdentityManagerLastUserGroupRefreshSuccessTimestamp,
|
||||
"Timestamp of last successful user group refresh success",
|
||||
stats.UnitSeconds,
|
||||
)
|
||||
identityManagerLastUserGroupRefreshErrorTimestamp = stats.Int64(
|
||||
metrics.IdentityManagerLastUserGroupRefreshErrorTimestamp,
|
||||
"Timestamp of last directory user group refresh error",
|
||||
stats.UnitSeconds,
|
||||
)
|
||||
identityManagerLastUserGroupRefreshSuccess = stats.Int64(
|
||||
metrics.IdentityManagerLastUserGroupRefreshSuccess,
|
||||
"Total successful directory user group refresh requests",
|
||||
stats.UnitDimensionless,
|
||||
)
|
||||
identityManagerLastUserGroupRefreshError = stats.Int64(
|
||||
metrics.IdentityManagerLastUserGroupRefreshError,
|
||||
"Total successful directory user refresh errors",
|
||||
stats.UnitDimensionless,
|
||||
)
|
||||
|
||||
identityManagerLastSessionRefreshSuccessTimestamp = stats.Int64(
|
||||
metrics.IdentityManagerLastSessionRefreshSuccessTimestamp,
|
||||
"Timestamp of last successful session refresh success",
|
||||
stats.UnitSeconds,
|
||||
)
|
||||
identityManagerLastSessionRefreshErrorTimestamp = stats.Int64(
|
||||
metrics.IdentityManagerLastSessionRefreshErrorTimestamp,
|
||||
"Timestamp of last session refresh error",
|
||||
stats.UnitSeconds,
|
||||
)
|
||||
identityManagerLastSessionRefreshSuccess = stats.Int64(
|
||||
metrics.IdentityManagerLastSessionRefreshSuccess,
|
||||
"Total successful session refresh requests",
|
||||
stats.UnitDimensionless,
|
||||
)
|
||||
identityManagerLastSessionRefreshError = stats.Int64(
|
||||
metrics.IdentityManagerLastSessionRefreshError,
|
||||
"Total successful session refresh errors",
|
||||
stats.UnitDimensionless,
|
||||
)
|
||||
|
||||
// ConfigDBVersionView contains last databroker config version that was processed
|
||||
ConfigDBVersionView = &view.View{
|
||||
Name: configDBVersion.Name(),
|
||||
|
@ -95,11 +174,140 @@ var (
|
|||
Measure: identityManagerLastRefresh,
|
||||
Aggregation: view.LastValue(),
|
||||
}
|
||||
|
||||
// IdentityManagerLastUserRefreshSuccessView contains successful user refresh counter
|
||||
IdentityManagerLastUserRefreshSuccessView = &view.View{
|
||||
Name: identityManagerLastUserRefreshSuccess.Name(),
|
||||
Description: identityManagerLastUserRefreshSuccess.Description(),
|
||||
Measure: identityManagerLastUserRefreshSuccess,
|
||||
Aggregation: view.Count(),
|
||||
}
|
||||
// IdentityManagerLastUserRefreshErrorView contains user refresh errors counter
|
||||
IdentityManagerLastUserRefreshErrorView = &view.View{
|
||||
Name: identityManagerLastUserRefreshError.Name(),
|
||||
Description: identityManagerLastUserRefreshError.Description(),
|
||||
Measure: identityManagerLastUserRefreshError,
|
||||
Aggregation: view.Count(),
|
||||
}
|
||||
// IdentityManagerLastUserRefreshSuccessTimestampView contains successful user refresh counter
|
||||
IdentityManagerLastUserRefreshSuccessTimestampView = &view.View{
|
||||
Name: identityManagerLastUserRefreshSuccessTimestamp.Name(),
|
||||
Description: identityManagerLastUserRefreshSuccessTimestamp.Description(),
|
||||
Measure: identityManagerLastUserRefreshSuccessTimestamp,
|
||||
Aggregation: view.LastValue(),
|
||||
}
|
||||
// IdentityManagerLastUserRefreshErrorTimestampView contains user refresh errors counter
|
||||
IdentityManagerLastUserRefreshErrorTimestampView = &view.View{
|
||||
Name: identityManagerLastUserRefreshErrorTimestamp.Name(),
|
||||
Description: identityManagerLastUserRefreshErrorTimestamp.Description(),
|
||||
Measure: identityManagerLastUserRefreshErrorTimestamp,
|
||||
Aggregation: view.LastValue(),
|
||||
}
|
||||
|
||||
// IdentityManagerLastUserGroupRefreshSuccessView contains successful user group refresh counter
|
||||
IdentityManagerLastUserGroupRefreshSuccessView = &view.View{
|
||||
Name: identityManagerLastUserGroupRefreshSuccess.Name(),
|
||||
Description: identityManagerLastUserGroupRefreshSuccess.Description(),
|
||||
Measure: identityManagerLastUserGroupRefreshSuccess,
|
||||
Aggregation: view.Count(),
|
||||
}
|
||||
// IdentityManagerLastUserGroupRefreshErrorView contains user group refresh errors counter
|
||||
IdentityManagerLastUserGroupRefreshErrorView = &view.View{
|
||||
Name: identityManagerLastUserGroupRefreshError.Name(),
|
||||
Description: identityManagerLastUserGroupRefreshError.Description(),
|
||||
Measure: identityManagerLastUserGroupRefreshError,
|
||||
Aggregation: view.Count(),
|
||||
}
|
||||
// IdentityManagerLastUserGroupRefreshSuccessTimestampView contains successful user group refresh counter
|
||||
IdentityManagerLastUserGroupRefreshSuccessTimestampView = &view.View{
|
||||
Name: identityManagerLastUserGroupRefreshSuccessTimestamp.Name(),
|
||||
Description: identityManagerLastUserGroupRefreshSuccessTimestamp.Description(),
|
||||
Measure: identityManagerLastUserGroupRefreshSuccessTimestamp,
|
||||
Aggregation: view.LastValue(),
|
||||
}
|
||||
// IdentityManagerLastUserGroupRefreshErrorTimestampView contains user group refresh errors counter
|
||||
IdentityManagerLastUserGroupRefreshErrorTimestampView = &view.View{
|
||||
Name: identityManagerLastUserGroupRefreshErrorTimestamp.Name(),
|
||||
Description: identityManagerLastUserGroupRefreshErrorTimestamp.Description(),
|
||||
Measure: identityManagerLastUserGroupRefreshErrorTimestamp,
|
||||
Aggregation: view.LastValue(),
|
||||
}
|
||||
|
||||
// IdentityManagerLastSessionRefreshSuccessView contains successful user refresh counter
|
||||
IdentityManagerLastSessionRefreshSuccessView = &view.View{
|
||||
Name: identityManagerLastSessionRefreshSuccess.Name(),
|
||||
Description: identityManagerLastSessionRefreshSuccess.Description(),
|
||||
Measure: identityManagerLastSessionRefreshSuccess,
|
||||
Aggregation: view.Count(),
|
||||
}
|
||||
// IdentityManagerLastSessionRefreshErrorView contains user refresh errors counter
|
||||
IdentityManagerLastSessionRefreshErrorView = &view.View{
|
||||
Name: identityManagerLastUserRefreshError.Name(),
|
||||
Description: identityManagerLastUserRefreshError.Description(),
|
||||
Measure: identityManagerLastUserRefreshError,
|
||||
Aggregation: view.Count(),
|
||||
}
|
||||
// IdentityManagerLastSessionRefreshSuccessTimestampView contains successful session refresh counter
|
||||
IdentityManagerLastSessionRefreshSuccessTimestampView = &view.View{
|
||||
Name: identityManagerLastSessionRefreshSuccessTimestamp.Name(),
|
||||
Description: identityManagerLastSessionRefreshSuccessTimestamp.Description(),
|
||||
Measure: identityManagerLastSessionRefreshSuccessTimestamp,
|
||||
Aggregation: view.LastValue(),
|
||||
}
|
||||
// IdentityManagerLastSessionRefreshErrorTimestampView contains session refresh errors counter
|
||||
IdentityManagerLastSessionRefreshErrorTimestampView = &view.View{
|
||||
Name: identityManagerLastSessionRefreshErrorTimestamp.Name(),
|
||||
Description: identityManagerLastSessionRefreshErrorTimestamp.Description(),
|
||||
Measure: identityManagerLastSessionRefreshErrorTimestamp,
|
||||
Aggregation: view.LastValue(),
|
||||
}
|
||||
)
|
||||
|
||||
// RecordIdentityManagerLastRefresh records that the identity manager refreshed users and groups.
|
||||
func RecordIdentityManagerLastRefresh() {
|
||||
stats.Record(context.Background(), identityManagerLastRefresh.M(time.Now().Unix()))
|
||||
func RecordIdentityManagerLastRefresh(ctx context.Context) {
|
||||
stats.Record(ctx, identityManagerLastRefresh.M(time.Now().Unix()))
|
||||
}
|
||||
|
||||
// RecordIdentityManagerUserRefresh updates timestamp and counter for user refresh
|
||||
func RecordIdentityManagerUserRefresh(ctx context.Context, err error) {
|
||||
counter := identityManagerLastUserRefreshSuccess
|
||||
ts := identityManagerLastUserRefreshSuccessTimestamp
|
||||
if err != nil {
|
||||
counter = identityManagerLastUserRefreshError
|
||||
ts = identityManagerLastUserRefreshErrorTimestamp
|
||||
}
|
||||
stats.Record(ctx,
|
||||
ts.M(time.Now().Unix()),
|
||||
counter.M(1),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordIdentityManagerUserGroupRefresh updates timestamp and counter for user group update
|
||||
func RecordIdentityManagerUserGroupRefresh(ctx context.Context, err error) {
|
||||
counter := identityManagerLastUserGroupRefreshSuccess
|
||||
ts := identityManagerLastUserGroupRefreshSuccessTimestamp
|
||||
if err != nil {
|
||||
counter = identityManagerLastUserGroupRefreshError
|
||||
ts = identityManagerLastUserGroupRefreshErrorTimestamp
|
||||
}
|
||||
stats.Record(ctx,
|
||||
ts.M(time.Now().Unix()),
|
||||
counter.M(1),
|
||||
)
|
||||
}
|
||||
|
||||
// RecordIdentityManagerSessionRefresh updates timestamp and counter for session refresh
|
||||
func RecordIdentityManagerSessionRefresh(ctx context.Context, err error) {
|
||||
counter := identityManagerLastSessionRefreshSuccess
|
||||
ts := identityManagerLastSessionRefreshSuccessTimestamp
|
||||
if err != nil {
|
||||
counter = identityManagerLastSessionRefreshError
|
||||
ts = identityManagerLastSessionRefreshErrorTimestamp
|
||||
}
|
||||
stats.Record(ctx,
|
||||
ts.M(time.Now().Unix()),
|
||||
counter.M(1),
|
||||
)
|
||||
}
|
||||
|
||||
// SetDBConfigInfo records status, databroker version and error count while parsing
|
||||
|
|
|
@ -13,6 +13,34 @@ const (
|
|||
ConfigLastReloadSuccess = "config_last_reload_success"
|
||||
// IdentityManagerLastRefreshTimestamp is IdP sync timestamp
|
||||
IdentityManagerLastRefreshTimestamp = "identity_manager_last_refresh_timestamp"
|
||||
|
||||
// IdentityManagerLastUserRefreshSuccessTimestamp is a timestamp of last user refresh
|
||||
IdentityManagerLastUserRefreshSuccessTimestamp = "identity_manager_last_user_refresh_success_timestamp"
|
||||
// IdentityManagerLastUserRefreshErrorTimestamp is a timestamp of last user refresh error
|
||||
IdentityManagerLastUserRefreshErrorTimestamp = "identity_manager_last_user_refresh_error_timestamp"
|
||||
// IdentityManagerLastUserRefreshError is a counter of last user refresh errors
|
||||
IdentityManagerLastUserRefreshError = "identity_manager_last_user_refresh_errors"
|
||||
// IdentityManagerLastUserRefreshSuccess is a counter of last user refresh success
|
||||
IdentityManagerLastUserRefreshSuccess = "identity_manager_last_user_refresh_success"
|
||||
|
||||
// IdentityManagerLastUserGroupRefreshSuccessTimestamp is a timestamp of last user group refresh
|
||||
IdentityManagerLastUserGroupRefreshSuccessTimestamp = "identity_manager_last_user_group_refresh_success_timestamp"
|
||||
// IdentityManagerLastUserGroupRefreshErrorTimestamp is a timestamp of last user group refresh error
|
||||
IdentityManagerLastUserGroupRefreshErrorTimestamp = "identity_manager_last_user_group_refresh_error_timestamp"
|
||||
// IdentityManagerLastUserGroupRefreshError is a counter of last user group refresh errors
|
||||
IdentityManagerLastUserGroupRefreshError = "identity_manager_last_user_group_refresh_errors"
|
||||
// IdentityManagerLastUserGroupRefreshSuccess is a counter of last user group refresh success
|
||||
IdentityManagerLastUserGroupRefreshSuccess = "identity_manager_last_user_group_refresh_success"
|
||||
|
||||
// IdentityManagerLastSessionRefreshSuccessTimestamp is a timestamp of last session refresh
|
||||
IdentityManagerLastSessionRefreshSuccessTimestamp = "identity_manager_last_session_refresh_success_timestamp"
|
||||
// IdentityManagerLastSessionRefreshErrorTimestamp is a timestamp of last session refresh error
|
||||
IdentityManagerLastSessionRefreshErrorTimestamp = "identity_manager_last_session_refresh_error_timestamp"
|
||||
// IdentityManagerLastSessionRefreshError is a counter of last session refresh errors
|
||||
IdentityManagerLastSessionRefreshError = "identity_manager_last_session_refresh_errors"
|
||||
// IdentityManagerLastSessionRefreshSuccess is a counter of last session refresh success
|
||||
IdentityManagerLastSessionRefreshSuccess = "identity_manager_last_session_refresh_success"
|
||||
|
||||
// BuildInfo is a gauge that may be used to detect whether component is live, and also has version
|
||||
BuildInfo = "build_info"
|
||||
// PolicyCountTotal is total amount of routes currently configured
|
||||
|
|
Loading…
Add table
Reference in a new issue