policy: add outlier_detection (#1786)

* add support for cluster outlier detection

* add docs
This commit is contained in:
Caleb Doxsey 2021-01-20 08:33:48 -07:00 committed by GitHub
parent 0bc598f952
commit d9699cbcb9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 1040 additions and 544 deletions

View file

@ -12,6 +12,7 @@ import (
"strings"
"time"
envoy_config_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
"github.com/golang/protobuf/ptypes"
"github.com/pomerium/pomerium/internal/hashutil"
@ -137,6 +138,9 @@ type Policy struct {
// to upstream requests.
EnableGoogleCloudServerlessAuthentication bool `mapstructure:"enable_google_cloud_serverless_authentication" yaml:"enable_google_cloud_serverless_authentication,omitempty"` //nolint
// OutlierDetection configures outlier detection for the upstream cluster.
OutlierDetection *PolicyOutlierDetection `mapstructure:"outlier_detection" yaml:"outlier_detection,omitempty" json:"outlier_detection,omitempty"`
SubPolicies []SubPolicy `mapstructure:"sub_policies" yaml:"sub_policies,omitempty" json:"sub_policies,omitempty"`
}
@ -163,6 +167,8 @@ type PolicyRedirect struct {
StripQuery *bool `mapstructure:"strip_query" yaml:"strip_query,omitempty" json:"strip_query,omitempty"`
}
type PolicyOutlierDetection envoy_config_cluster_v3.OutlierDetection
// NewPolicyFromProto creates a new Policy from a protobuf policy config route.
func NewPolicyFromProto(pb *configpb.Route) (*Policy, error) {
timeout, _ := ptypes.Duration(pb.GetTimeout())
@ -211,6 +217,30 @@ func NewPolicyFromProto(pb *configpb.Route) (*Policy, error) {
StripQuery: pb.Redirect.StripQuery,
}
}
if pb.OutlierDetection != nil {
p.OutlierDetection = &PolicyOutlierDetection{
Consecutive_5Xx: pb.OutlierDetection.Consecutive_5Xx,
Interval: pb.OutlierDetection.Interval,
BaseEjectionTime: pb.OutlierDetection.BaseEjectionTime,
MaxEjectionPercent: pb.OutlierDetection.MaxEjectionPercent,
EnforcingConsecutive_5Xx: pb.OutlierDetection.EnforcingConsecutive_5Xx,
EnforcingSuccessRate: pb.OutlierDetection.EnforcingSuccessRate,
SuccessRateMinimumHosts: pb.OutlierDetection.SuccessRateMinimumHosts,
SuccessRateRequestVolume: pb.OutlierDetection.SuccessRateRequestVolume,
SuccessRateStdevFactor: pb.OutlierDetection.SuccessRateStdevFactor,
ConsecutiveGatewayFailure: pb.OutlierDetection.ConsecutiveGatewayFailure,
EnforcingConsecutiveGatewayFailure: pb.OutlierDetection.EnforcingConsecutiveGatewayFailure,
SplitExternalLocalOriginErrors: pb.OutlierDetection.SplitExternalLocalOriginErrors,
ConsecutiveLocalOriginFailure: pb.OutlierDetection.ConsecutiveLocalOriginFailure,
EnforcingConsecutiveLocalOriginFailure: pb.OutlierDetection.EnforcingConsecutiveLocalOriginFailure,
EnforcingLocalOriginSuccessRate: pb.OutlierDetection.EnforcingLocalOriginSuccessRate,
FailurePercentageThreshold: pb.OutlierDetection.FailurePercentageThreshold,
EnforcingFailurePercentage: pb.OutlierDetection.EnforcingFailurePercentage,
EnforcingFailurePercentageLocalOrigin: pb.OutlierDetection.EnforcingFailurePercentageLocalOrigin,
FailurePercentageMinimumHosts: pb.OutlierDetection.FailurePercentageMinimumHosts,
FailurePercentageRequestVolume: pb.OutlierDetection.FailurePercentageRequestVolume,
}
}
for _, sp := range pb.GetPolicies() {
p.SubPolicies = append(p.SubPolicies, SubPolicy{
ID: sp.GetId(),
@ -286,6 +316,30 @@ func (p *Policy) ToProto() *configpb.Route {
StripQuery: p.Redirect.StripQuery,
}
}
if p.OutlierDetection != nil {
pb.OutlierDetection = &configpb.OutlierDetection{
Consecutive_5Xx: p.OutlierDetection.Consecutive_5Xx,
Interval: p.OutlierDetection.Interval,
BaseEjectionTime: p.OutlierDetection.BaseEjectionTime,
MaxEjectionPercent: p.OutlierDetection.MaxEjectionPercent,
EnforcingConsecutive_5Xx: p.OutlierDetection.EnforcingConsecutive_5Xx,
EnforcingSuccessRate: p.OutlierDetection.EnforcingSuccessRate,
SuccessRateMinimumHosts: p.OutlierDetection.SuccessRateMinimumHosts,
SuccessRateRequestVolume: p.OutlierDetection.SuccessRateRequestVolume,
SuccessRateStdevFactor: p.OutlierDetection.SuccessRateStdevFactor,
ConsecutiveGatewayFailure: p.OutlierDetection.ConsecutiveGatewayFailure,
EnforcingConsecutiveGatewayFailure: p.OutlierDetection.EnforcingConsecutiveGatewayFailure,
SplitExternalLocalOriginErrors: p.OutlierDetection.SplitExternalLocalOriginErrors,
ConsecutiveLocalOriginFailure: p.OutlierDetection.ConsecutiveLocalOriginFailure,
EnforcingConsecutiveLocalOriginFailure: p.OutlierDetection.EnforcingConsecutiveLocalOriginFailure,
EnforcingLocalOriginSuccessRate: p.OutlierDetection.EnforcingLocalOriginSuccessRate,
FailurePercentageThreshold: p.OutlierDetection.FailurePercentageThreshold,
EnforcingFailurePercentage: p.OutlierDetection.EnforcingFailurePercentage,
EnforcingFailurePercentageLocalOrigin: p.OutlierDetection.EnforcingFailurePercentageLocalOrigin,
FailurePercentageMinimumHosts: p.OutlierDetection.FailurePercentageMinimumHosts,
FailurePercentageRequestVolume: p.OutlierDetection.FailurePercentageRequestVolume,
}
}
return pb
}

View file

@ -1172,6 +1172,17 @@ If set, the route will only match incoming requests with a path that matches the
If set, the URL path will be rewritten according to the pattern and substitution, similar to `prefix_rewrite`.
### Outlier Detection
- `yaml`/`json` setting: `outlier_detection`
- Type: `object`
- Optional
- Example: `{ "consecutive_5xx": 12 }`
Outlier detection and ejection is the process of dynamically determining whether some number of hosts in an upstream cluster are performing unlike the others and removing them from the healthy load balancing set.
See the [envoy documentation](https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/outlier#arch-overview-outlier-detection) for more details.
### Route Timeout
- `yaml`/`json` setting: `timeout`
- Type: [Go Duration](https://golang.org/pkg/time/#Duration.String) `string`

View file

@ -1296,6 +1296,17 @@ settings:
- Example: `{ "regex_rewrite_pattern":"^/service/([^/]+)(/.*)$", "regex_rewrite_substitution": "\\2/instance/\\1" }`
doc: |
If set, the URL path will be rewritten according to the pattern and substitution, similar to `prefix_rewrite`.
- name: "Outlier Detection"
keys: ["outlier_detection"]
attributes: |
- `yaml`/`json` setting: `outlier_detection`
- Type: `object`
- Optional
- Example: `{ "consecutive_5xx": 12 }`
doc: |
Outlier detection and ejection is the process of dynamically determining whether some number of hosts in an upstream cluster are performing unlike the others and removing them from the healthy load balancing set.
See the [envoy documentation](https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/outlier#arch-overview-outlier-detection) for more details.
- name: "Route Timeout"
keys: ["timeout"]
attributes: |

View file

@ -6,7 +6,9 @@ import (
"path/filepath"
"testing"
envoy_config_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3"
"github.com/stretchr/testify/assert"
"google.golang.org/protobuf/types/known/wrapperspb"
"github.com/pomerium/pomerium/config"
"github.com/pomerium/pomerium/internal/testutil"
@ -207,7 +209,9 @@ func Test_buildCluster(t *testing.T) {
rootCAPath, _ := getRootCertificateAuthority()
rootCA := srv.filemgr.FileDataSource(rootCAPath).GetFilename()
t.Run("insecure", func(t *testing.T) {
cluster := buildCluster("example", mustParseURL("http://example.com"), nil, true, config.GetEnvoyDNSLookupFamily(config.DNSLookupFamilyV4Only))
cluster := buildCluster("example", mustParseURL("http://example.com"), nil, true,
config.GetEnvoyDNSLookupFamily(config.DNSLookupFamilyV4Only),
nil)
testutil.AssertProtoJSONEqual(t, `
{
"name": "example",
@ -242,7 +246,9 @@ func Test_buildCluster(t *testing.T) {
transportSocket := srv.buildPolicyTransportSocket(&config.Policy{
Destination: u,
})
cluster := buildCluster("example", u, transportSocket, true, config.GetEnvoyDNSLookupFamily(config.DNSLookupFamilyAuto))
cluster := buildCluster("example", u, transportSocket, true,
config.GetEnvoyDNSLookupFamily(config.DNSLookupFamilyAuto),
nil)
testutil.AssertProtoJSONEqual(t, `
{
"name": "example",
@ -298,7 +304,9 @@ func Test_buildCluster(t *testing.T) {
`, cluster)
})
t.Run("ip address", func(t *testing.T) {
cluster := buildCluster("example", mustParseURL("http://127.0.0.1"), nil, true, config.GetEnvoyDNSLookupFamily(config.DNSLookupFamilyAuto))
cluster := buildCluster("example", mustParseURL("http://127.0.0.1"), nil, true,
config.GetEnvoyDNSLookupFamily(config.DNSLookupFamilyAuto),
nil)
testutil.AssertProtoJSONEqual(t, `
{
"name": "example",
@ -328,7 +336,9 @@ func Test_buildCluster(t *testing.T) {
`, cluster)
})
t.Run("localhost", func(t *testing.T) {
cluster := buildCluster("example", mustParseURL("http://localhost"), nil, true, config.GetEnvoyDNSLookupFamily(config.DNSLookupFamilyAuto))
cluster := buildCluster("example", mustParseURL("http://localhost"), nil, true,
config.GetEnvoyDNSLookupFamily(config.DNSLookupFamilyAuto),
nil)
testutil.AssertProtoJSONEqual(t, `
{
"name": "example",
@ -357,4 +367,44 @@ func Test_buildCluster(t *testing.T) {
}
`, cluster)
})
t.Run("outlier", func(t *testing.T) {
cluster := buildCluster("example", mustParseURL("http://example.com"), nil, true,
config.GetEnvoyDNSLookupFamily(config.DNSLookupFamilyV4Only),
&envoy_config_cluster_v3.OutlierDetection{
EnforcingConsecutive_5Xx: wrapperspb.UInt32(17),
SplitExternalLocalOriginErrors: true,
})
testutil.AssertProtoJSONEqual(t, `
{
"name": "example",
"type": "STRICT_DNS",
"connectTimeout": "10s",
"respectDnsTtl": true,
"http2ProtocolOptions": {
"allowConnect": true
},
"dnsLookupFamily": "V4_ONLY",
"outlierDetection": {
"enforcingConsecutive5xx": 17,
"splitExternalLocalOriginErrors": true
},
"loadAssignment": {
"clusterName": "example",
"endpoints": [{
"lbEndpoints": [{
"endpoint": {
"address": {
"socketAddress": {
"address": "example.com",
"ipv4Compat": true,
"portValue": 80
}
}
}
}]
}]
}
}
`, cluster)
})
}

View file

@ -54,7 +54,9 @@ func (srv *Server) buildClusters(options *config.Options) []*envoy_config_cluste
func (srv *Server) buildInternalCluster(options *config.Options, name string, endpoint *url.URL, forceHTTP2 bool) *envoy_config_cluster_v3.Cluster {
dnsLookupFamily := config.GetEnvoyDNSLookupFamily(options.DNSLookupFamily)
return buildCluster(name, endpoint, srv.buildInternalTransportSocket(options, endpoint), forceHTTP2, dnsLookupFamily)
return buildCluster(name, endpoint, srv.buildInternalTransportSocket(options, endpoint), forceHTTP2,
dnsLookupFamily,
nil)
}
func (srv *Server) buildPolicyCluster(options *config.Options, policy *config.Policy) *envoy_config_cluster_v3.Cluster {
@ -63,7 +65,9 @@ func (srv *Server) buildPolicyCluster(options *config.Options, policy *config.Po
if policy.EnableGoogleCloudServerlessAuthentication {
dnsLookupFamily = envoy_config_cluster_v3.Cluster_V4_ONLY
}
return buildCluster(name, policy.Destination, srv.buildPolicyTransportSocket(policy), false, dnsLookupFamily)
return buildCluster(name, policy.Destination, srv.buildPolicyTransportSocket(policy), false,
dnsLookupFamily,
(*envoy_config_cluster_v3.OutlierDetection)(policy.OutlierDetection))
}
func (srv *Server) buildInternalTransportSocket(options *config.Options, endpoint *url.URL) *envoy_config_core_v3.TransportSocket {
@ -201,6 +205,7 @@ func buildCluster(
transportSocket *envoy_config_core_v3.TransportSocket,
forceHTTP2 bool,
dnsLookupFamily envoy_config_cluster_v3.Cluster_DnsLookupFamily,
outlierDetection *envoy_config_cluster_v3.OutlierDetection,
) *envoy_config_cluster_v3.Cluster {
if endpoint == nil {
return nil
@ -233,9 +238,10 @@ func buildCluster(
}},
}},
},
RespectDnsTtl: true,
TransportSocket: transportSocket,
DnsLookupFamily: dnsLookupFamily,
RespectDnsTtl: true,
TransportSocket: transportSocket,
DnsLookupFamily: dnsLookupFamily,
OutlierDetection: outlierDetection,
}
if forceHTTP2 {

View file

@ -24,9 +24,8 @@ import (
envoy_config_endpoint_v3 "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3"
envoy_config_metrics_v3 "github.com/envoyproxy/go-control-plane/envoy/config/metrics/v3"
envoy_config_trace_v3 "github.com/envoyproxy/go-control-plane/envoy/config/trace/v3"
"github.com/google/go-cmp/cmp"
"github.com/golang/protobuf/proto"
"github.com/google/go-cmp/cmp"
"github.com/natefinch/atomic"
"github.com/rs/zerolog"
"google.golang.org/protobuf/encoding/protojson"

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@ option go_package = "github.com/pomerium/pomerium/pkg/grpc/config";
import "google/protobuf/duration.proto";
import "google/protobuf/struct.proto";
import "google/protobuf/wrappers.proto";
message Config {
string name = 1;
@ -23,6 +24,29 @@ message RouteRedirect {
optional bool strip_query = 8;
}
message OutlierDetection {
google.protobuf.UInt32Value consecutive_5xx = 1;
google.protobuf.Duration interval = 2;
google.protobuf.Duration base_ejection_time = 3;
google.protobuf.UInt32Value max_ejection_percent = 4;
google.protobuf.UInt32Value enforcing_consecutive_5xx = 5;
google.protobuf.UInt32Value enforcing_success_rate = 6;
google.protobuf.UInt32Value success_rate_minimum_hosts = 7;
google.protobuf.UInt32Value success_rate_request_volume = 8;
google.protobuf.UInt32Value success_rate_stdev_factor = 9;
google.protobuf.UInt32Value consecutive_gateway_failure = 10;
google.protobuf.UInt32Value enforcing_consecutive_gateway_failure = 11;
bool split_external_local_origin_errors = 12;
google.protobuf.UInt32Value consecutive_local_origin_failure = 13;
google.protobuf.UInt32Value enforcing_consecutive_local_origin_failure = 14;
google.protobuf.UInt32Value enforcing_local_origin_success_rate = 15;
google.protobuf.UInt32Value failure_percentage_threshold = 16;
google.protobuf.UInt32Value enforcing_failure_percentage = 17;
google.protobuf.UInt32Value enforcing_failure_percentage_local_origin = 18;
google.protobuf.UInt32Value failure_percentage_minimum_hosts = 19;
google.protobuf.UInt32Value failure_percentage_request_volume = 20;
}
message Route {
string name = 1;
@ -69,6 +93,8 @@ message Route {
string kubernetes_service_account_token = 26;
OutlierDetection outlier_detection = 35;
repeated Policy policies = 27;
string id = 28;
}