GRPC metrics improvements

- change to ocgrpc plugin
- rename labels to be more consistent
- refactor view registration patterns
- add server metrics- add client request size metrics
This commit is contained in:
Travis Groth 2019-07-10 22:35:56 -04:00
parent d0f1314286
commit 4bd4b27f28
11 changed files with 273 additions and 152 deletions

View file

@ -3,100 +3,100 @@ package metrics // import "github.com/pomerium/pomerium/internal/metrics"
import (
"context"
"strings"
"time"
"github.com/golang/protobuf/proto"
"github.com/pomerium/pomerium/internal/log"
"go.opencensus.io/stats"
"go.opencensus.io/plugin/ocgrpc"
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"
"google.golang.org/grpc"
"google.golang.org/grpc/status"
grpcstats "google.golang.org/grpc/stats"
)
var (
grpcServerRequestCount = stats.Int64("grpc_server_requests_total", "Total grpc Requests", "1")
grpcServerResponseSize = stats.Int64("grpc_server_response_size_bytes", "grpc Server Response Size in bytes", "bytes")
grpcServerRequestDuration = stats.Int64("grpc_server_request_duration_ms", "grpc Request duration in ms", "ms")
grpcSizeDistribution = view.Distribution(
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024,
2048, 4096, 8192, 16384,
)
grcpLatencyDistribution = view.Distribution(
1, 2, 5, 7, 10, 25, 50, 75,
100, 250, 500, 750, 1000,
)
grpcClientRequestCount = stats.Int64("grpc_client_requests_total", "Total grpc Client Requests", "1")
grpcClientResponseSize = stats.Int64("grpc_client_response_size_bytes", "grpc Client Response Size in bytes", "bytes")
grpcClientRequestDuration = stats.Int64("grpc_client_request_duration_ms", "grpc Client Request duration in ms", "ms")
// GRPCServerRequestCountView is an OpenCensus view which tracks GRPC Server requests by pomerium service, host, grpc service, grpc method, and status
// GRPCServerRequestCountView is an OpenCensus view which counts GRPC Server requests by pomerium service, grpc service, grpc method, and status
GRPCServerRequestCountView = &view.View{
Name: grpcServerRequestCount.Name(),
Measure: grpcServerRequestCount,
Description: grpcServerRequestCount.Description(),
TagKeys: []tag.Key{keyService, keyHost, keyMethod, keyStatus, keyGRPCService},
Name: "grpc_server_requests_total",
Measure: ocgrpc.ServerLatency,
Description: "Total grpc Requests",
TagKeys: []tag.Key{keyService, keyGRPCMethod, ocgrpc.KeyServerStatus, keyGRPCService},
Aggregation: view.Count(),
}
// GRPCServerRequestDurationView is an OpenCensus view which tracks GRPC Server request duration by pomerium service, host, grpc service, grpc method, and statu
// GRPCServerRequestDurationView is an OpenCensus view which tracks GRPC Server request duration by pomerium service, grpc service, grpc method, and status
GRPCServerRequestDurationView = &view.View{
Name: grpcServerRequestDuration.Name(),
Measure: grpcServerRequestDuration,
Description: grpcServerRequestDuration.Description(),
TagKeys: []tag.Key{keyService, keyHost, keyMethod, keyStatus, keyGRPCService},
Aggregation: view.Distribution(
1, 2, 5, 7, 10, 25, 500, 750,
100, 250, 500, 750,
1000, 2500, 5000, 7500,
10000, 25000, 50000, 75000,
100000,
),
Name: "grpc_server_request_duration_ms",
Measure: ocgrpc.ServerLatency,
Description: "grpc Request duration in ms",
TagKeys: []tag.Key{keyService, keyGRPCMethod, ocgrpc.KeyServerStatus, keyGRPCService},
Aggregation: grcpLatencyDistribution,
}
// GRPCServerResponseSizeView is an OpenCensus view which tracks GRPC Server request duration by pomerium service, host, grpc service, grpc method, and statu
// GRPCServerResponseSizeView is an OpenCensus view which tracks GRPC Server response size by pomerium service, grpc service, grpc method, and status
GRPCServerResponseSizeView = &view.View{
Name: grpcServerResponseSize.Name(),
Measure: grpcServerResponseSize,
Description: grpcServerResponseSize.Description(),
TagKeys: []tag.Key{keyService, keyHost, keyMethod, keyStatus, keyGRPCService},
Aggregation: view.Distribution(
1, 256, 512, 1024, 2048, 8192, 16384, 32768, 65536, 131072, 262144, 524288,
1048576, 2097152, 4194304, 8388608,
),
Name: "grpc_server_response_size_bytes",
Measure: ocgrpc.ServerSentBytesPerRPC,
Description: "grpc Server Response Size in bytes",
TagKeys: []tag.Key{keyService, keyGRPCMethod, ocgrpc.KeyServerStatus, keyGRPCService},
Aggregation: grpcSizeDistribution,
}
// GRPCClientRequestCountView is an OpenCensus view which tracks GRPC Client requests by pomerium service, target host, grpc service, grpc method, and statu
// GRPCServerRequestSizeView is an OpenCensus view which tracks GRPC Server request size by pomerium service, grpc service, grpc method, and status
GRPCServerRequestSizeView = &view.View{
Name: "grpc_server_request_size_bytes",
Measure: ocgrpc.ServerReceivedBytesPerRPC,
Description: "grpc Server Request Size in bytes",
TagKeys: []tag.Key{keyService, keyGRPCMethod, ocgrpc.KeyServerStatus, keyGRPCService},
Aggregation: grpcSizeDistribution,
}
// GRPCClientRequestCountView is an OpenCensus view which tracks GRPC Client requests by pomerium service, target host, grpc service, grpc method, and status
GRPCClientRequestCountView = &view.View{
Name: grpcClientRequestCount.Name(),
Measure: grpcClientRequestCount,
Description: grpcClientRequestCount.Description(),
TagKeys: []tag.Key{keyService, keyHost, keyMethod, keyStatus, keyGRPCService},
Name: "grpc_client_requests_total",
Measure: ocgrpc.ClientRoundtripLatency,
Description: "Total grpc Client Requests",
TagKeys: []tag.Key{keyService, keyHost, keyGRPCMethod, keyGRPCService, ocgrpc.KeyClientStatus},
Aggregation: view.Count(),
}
// GRPCClientRequestDurationView is an OpenCensus view which tracks GRPC Client request duration by pomerium service, target host, grpc service, grpc method, and statu
// GRPCClientRequestDurationView is an OpenCensus view which tracks GRPC Client request duration by pomerium service, target host, grpc service, grpc method, and status
GRPCClientRequestDurationView = &view.View{
Name: grpcClientRequestDuration.Name(),
Measure: grpcClientRequestDuration,
Description: grpcClientRequestDuration.Description(),
TagKeys: []tag.Key{keyService, keyHost, keyMethod, keyStatus, keyGRPCService},
Aggregation: view.Distribution(
1, 2, 5, 7, 10, 25, 500, 750,
100, 250, 500, 750,
1000, 2500, 5000, 7500,
10000, 25000, 50000, 75000,
100000,
),
Name: "grpc_client_request_duration_ms",
Measure: ocgrpc.ClientRoundtripLatency,
Description: "grpc Client Request duration in ms",
TagKeys: []tag.Key{keyService, keyHost, keyGRPCMethod, keyGRPCService, ocgrpc.KeyClientStatus},
Aggregation: grcpLatencyDistribution,
}
// GRPCClientResponseSizeView is an OpenCensus view which tracks GRPC Client response size by pomerium service, target host, grpc service, grpc method, and statu
// GRPCClientResponseSizeView is an OpenCensus view which tracks GRPC Client response size by pomerium service, target host, grpc service, grpc method, and status
GRPCClientResponseSizeView = &view.View{
Name: grpcClientResponseSize.Name(),
Measure: grpcClientResponseSize,
Description: grpcClientResponseSize.Description(),
TagKeys: []tag.Key{keyService, keyHost, keyMethod, keyStatus, keyGRPCService},
Aggregation: view.Distribution(
1, 256, 512, 1024, 2048, 8192, 16384, 32768, 65536, 131072, 262144, 524288,
1048576, 2097152, 4194304, 8388608,
),
Name: "grpc_client_response_size_bytes",
Measure: ocgrpc.ClientReceivedBytesPerRPC,
Description: "grpc Client Response Size in bytes",
TagKeys: []tag.Key{keyService, keyHost, keyGRPCMethod, keyGRPCService, ocgrpc.KeyClientStatus},
Aggregation: grpcSizeDistribution,
}
// GRPCClientRequestSizeView is an OpenCensus view which tracks GRPC Client request size by pomerium service, target host, grpc service, grpc method, and status
GRPCClientRequestSizeView = &view.View{
Name: "grpc_client_request_size_bytes",
Measure: ocgrpc.ClientSentBytesPerRPC,
Description: "grpc Client Request Size in bytes",
TagKeys: []tag.Key{keyService, keyHost, keyGRPCMethod, keyGRPCService, ocgrpc.KeyClientStatus},
Aggregation: grpcSizeDistribution,
}
)
// GRPCClientInterceptor creates a UnaryClientInterceptor which tracks metrics of grpc client requests
// GRPCClientInterceptor creates a UnaryClientInterceptor which updates the RPC context with metric tag
// metadata
func GRPCClientInterceptor(service string) grpc.UnaryClientInterceptor {
return func(
ctx context.Context,
@ -107,11 +107,6 @@ func GRPCClientInterceptor(service string) grpc.UnaryClientInterceptor {
invoker grpc.UnaryInvoker,
opts ...grpc.CallOption) error {
startTime := time.Now()
// Calls the invoker to execute RPC
err := invoker(ctx, method, req, reply, cc, opts...)
// Split the method into parts for better slicing
rpcInfo := strings.SplitN(method, "/", 3)
var rpcMethod string
@ -121,30 +116,62 @@ func GRPCClientInterceptor(service string) grpc.UnaryClientInterceptor {
rpcMethod = rpcInfo[2]
}
responseStatus, _ := status.FromError(err)
ctx, tagErr := tag.New(
context.Background(),
taggedCtx, tagErr := tag.New(
ctx,
tag.Insert(keyService, service),
tag.Insert(keyHost, cc.Target()),
tag.Insert(keyMethod, rpcMethod),
tag.Insert(keyGRPCMethod, rpcMethod),
tag.Insert(keyGRPCService, rpcService),
tag.Insert(keyStatus, responseStatus.Code().String()),
)
if tagErr != nil {
log.Warn().Err(tagErr).Str("context", "HTTPMetricsRoundTripper").Msg("Failed to create context tag")
} else {
responseProto := reply.(proto.Message)
responseSize := proto.Size(responseProto)
stats.Record(ctx,
grpcClientRequestCount.M(1),
grpcClientRequestDuration.M(time.Since(startTime).Nanoseconds()/int64(time.Millisecond)),
grpcClientResponseSize.M(int64(responseSize)),
)
log.Warn().Err(tagErr).Str("context", "GRPCClientInterceptor").Msg("internal/metrics: Failed to create context")
return invoker(ctx, method, req, reply, cc, opts...)
}
return err
// Calls the invoker to execute RPC
return invoker(taggedCtx, method, req, reply, cc, opts...)
}
}
// GRPCServerStatsHandler provides a grpc stats.Handler for a pomerium service to add tags and track
// metrics to server side calls
type GRPCServerStatsHandler struct {
service string
grpcstats.Handler
}
// TagRPC implements grpc.stats.Handler and adds tags to the context of a given RPC
func (h *GRPCServerStatsHandler) TagRPC(ctx context.Context, tagInfo *grpcstats.RPCTagInfo) context.Context {
handledCtx := h.Handler.TagRPC(ctx, tagInfo)
// Split the method into parts for better slicing
rpcInfo := strings.SplitN(tagInfo.FullMethodName, "/", 3)
var rpcMethod string
var rpcService string
if len(rpcInfo) == 3 {
rpcService = rpcInfo[1]
rpcMethod = rpcInfo[2]
}
taggedCtx, tagErr := tag.New(
handledCtx,
tag.Insert(keyService, h.service),
tag.Insert(keyGRPCMethod, rpcMethod),
tag.Insert(keyGRPCService, rpcService),
)
if tagErr != nil {
log.Warn().Err(tagErr).Str("context", "GRPCServerStatsHandler").Msg("internal/metrics: Failed to create context")
return handledCtx
}
return taggedCtx
}
// NewGRPCServerStatsHandler creates a new GRPCServerStatsHandler for a pomerium service
func NewGRPCServerStatsHandler(service string) grpcstats.Handler {
return &GRPCServerStatsHandler{service: service, Handler: &ocgrpc.ServerHandler{}}
}