envoy: enable TCP keepalive for internal clusters (#4902)

In split service mode, and during periods of inactivity, the gRPC
connections to the databroker may fall idle. Some network firewalls may
eventually time out an idle TCP connection and even start dropping
subsequent packets once connection traffic resumes. Combined with Linux
default TCP retransmission settings, this could cause a broken
connection to persist for over 15 minutes.

In an attempt to avoid this scenario, enable TCP keepalive for outbound
gRPC connections, matching the Go standard library default settings for
time & interval: 15 seconds for both. (The probe count does not appear
to be set, so it will remain at the OS default.)

Add a test case exercising the BuildClusters() method with the default
configuration options, comparing the results with a reference "golden"
file in the testdata directory. Also add an '-update' flag to make it
easier to update the reference golden when needed:

  go test ./config/envoyconfig -update
This commit is contained in:
Kenneth Jenkins 2024-01-11 09:12:45 -08:00 committed by GitHub
parent 6efef022af
commit c7c2087483
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 349 additions and 4 deletions

View file

@ -55,7 +55,7 @@ issues:
exclude-rules: exclude-rules:
# Exclude some linters from running on test files. # Exclude some linters from running on test files.
- path: _test\.go$|^tests/|^integration/|^samples/|templates\.go$ - path: _test\.go$|^tests/|^integration/|^samples/|^internal/testutil/|templates\.go$
linters: linters:
- bodyclose - bodyclose
- errcheck - errcheck

View file

@ -142,6 +142,14 @@ func (b *Builder) buildInternalCluster(
) (*envoy_config_cluster_v3.Cluster, error) { ) (*envoy_config_cluster_v3.Cluster, error) {
cluster := newDefaultEnvoyClusterConfig() cluster := newDefaultEnvoyClusterConfig()
cluster.DnsLookupFamily = config.GetEnvoyDNSLookupFamily(cfg.Options.DNSLookupFamily) cluster.DnsLookupFamily = config.GetEnvoyDNSLookupFamily(cfg.Options.DNSLookupFamily)
// Match the Go standard library default TCP keepalive settings.
const keepaliveTimeSeconds = 15
cluster.UpstreamConnectionOptions = &envoy_config_cluster_v3.UpstreamConnectionOptions{
TcpKeepalive: &envoy_config_core_v3.TcpKeepalive{
KeepaliveTime: wrapperspb.UInt32(keepaliveTimeSeconds),
KeepaliveInterval: wrapperspb.UInt32(keepaliveTimeSeconds),
},
}
var endpoints []Endpoint var endpoints []Endpoint
for _, dst := range dsts { for _, dst := range dsts {
ts, err := b.buildInternalTransportSocket(ctx, cfg, dst) ts, err := b.buildInternalTransportSocket(ctx, cfg, dst)

View file

@ -20,6 +20,23 @@ import (
"github.com/pomerium/pomerium/pkg/cryptutil" "github.com/pomerium/pomerium/pkg/cryptutil"
) )
func Test_BuildClusters(t *testing.T) {
// The admin address path is based on os.TempDir(), which will vary from
// system to system, so replace this with a stable location.
originalEnvoyAdminAddressPath := envoyAdminAddressPath
envoyAdminAddressPath = "/tmp/pomerium-envoy-admin.sock"
t.Cleanup(func() {
envoyAdminAddressPath = originalEnvoyAdminAddressPath
})
opts := config.NewDefaultOptions()
ctx := context.Background()
b := New("local-grpc", "local-http", "local-metrics", filemgr.NewManager(), nil)
clusters, err := b.BuildClusters(ctx, &config.Config{Options: opts})
require.NoError(t, err)
testutil.AssertProtoJSONFileEqual(t, "testdata/clusters.json", clusters)
}
func Test_buildPolicyTransportSocket(t *testing.T) { func Test_buildPolicyTransportSocket(t *testing.T) {
ctx := context.Background() ctx := context.Background()
cacheDir, _ := os.UserCacheDir() cacheDir, _ := os.UserCacheDir()

View file

@ -0,0 +1,289 @@
[
{
"loadAssignment": {
"clusterName": "pomerium-acme-tls-alpn",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "127.0.0.1",
"portValue": 0
}
}
}
}
]
}
]
},
"name": "pomerium-acme-tls-alpn"
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-control-plane-grpc",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-grpc",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-control-plane-grpc",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"http2ProtocolOptions": {
"allowConnect": true,
"initialConnectionWindowSize": 1048576,
"initialStreamWindowSize": 65536,
"maxConcurrentStreams": 100
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {
"keepaliveInterval": 15,
"keepaliveTime": 15
}
}
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-control-plane-http",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-http",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-control-plane-http",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"httpProtocolOptions": {
"headerKeyFormat": {
"statefulFormatter": {
"name": "preserve_case",
"typedConfig": {
"@type": "type.googleapis.com/envoy.extensions.http.header_formatters.preserve_case.v3.PreserveCaseFormatterConfig"
}
}
}
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {
"keepaliveInterval": 15,
"keepaliveTime": 15
}
}
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-control-plane-metrics",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-metrics",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-control-plane-metrics",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"httpProtocolOptions": {
"headerKeyFormat": {
"statefulFormatter": {
"name": "preserve_case",
"typedConfig": {
"@type": "type.googleapis.com/envoy.extensions.http.header_formatters.preserve_case.v3.PreserveCaseFormatterConfig"
}
}
}
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {
"keepaliveInterval": 15,
"keepaliveTime": 15
}
}
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-authorize",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-grpc",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-authorize",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"http2ProtocolOptions": {
"allowConnect": true,
"initialConnectionWindowSize": 1048576,
"initialStreamWindowSize": 65536,
"maxConcurrentStreams": 100
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {
"keepaliveInterval": 15,
"keepaliveTime": 15
}
}
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-databroker",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-grpc",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-databroker",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"http2ProtocolOptions": {
"allowConnect": true,
"initialConnectionWindowSize": 1048576,
"initialStreamWindowSize": 65536,
"maxConcurrentStreams": 100
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {
"keepaliveInterval": 15,
"keepaliveTime": 15
}
}
},
{
"connectTimeout": "10s",
"loadAssignment": {
"clusterName": "pomerium-envoy-admin",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"pipe": {
"path": "/tmp/pomerium-envoy-admin.sock"
}
}
}
}
]
}
]
},
"name": "pomerium-envoy-admin"
}
]

View file

@ -3,6 +3,7 @@ package testutil
import ( import (
"encoding/json" "encoding/json"
"flag"
"os" "os"
"path/filepath" "path/filepath"
"reflect" "reflect"
@ -11,6 +12,7 @@ import (
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/reflect/protoreflect" "google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/testing/protocmp" "google.golang.org/protobuf/testing/protocmp"
@ -29,6 +31,11 @@ func AssertProtoEqual(t *testing.T, expected, actual interface{}, msgAndArgs ...
// of protobuf messages. // of protobuf messages.
func AssertProtoJSONEqual(t *testing.T, expected string, protoMsg interface{}, msgAndArgs ...interface{}) bool { func AssertProtoJSONEqual(t *testing.T, expected string, protoMsg interface{}, msgAndArgs ...interface{}) bool {
t.Helper() t.Helper()
formattedJSON := formattedProtoJSON(protoMsg)
return assert.Equal(t, reformatJSON(json.RawMessage(expected)), formattedJSON, msgAndArgs...)
}
func formattedProtoJSON(protoMsg interface{}) string {
protoMsgVal := reflect.ValueOf(protoMsg) protoMsgVal := reflect.ValueOf(protoMsg)
if protoMsgVal.Kind() == reflect.Slice { if protoMsgVal.Kind() == reflect.Slice {
var protoMsgs []json.RawMessage var protoMsgs []json.RawMessage
@ -36,10 +43,9 @@ func AssertProtoJSONEqual(t *testing.T, expected string, protoMsg interface{}, m
protoMsgs = append(protoMsgs, toProtoJSON(protoMsgVal.Index(i).Interface())) protoMsgs = append(protoMsgs, toProtoJSON(protoMsgVal.Index(i).Interface()))
} }
bs, _ := json.Marshal(protoMsgs) bs, _ := json.Marshal(protoMsgs)
return assert.Equal(t, reformatJSON(json.RawMessage(expected)), reformatJSON(bs), msgAndArgs...) return reformatJSON(bs)
} }
return reformatJSON(toProtoJSON(protoMsg))
return assert.Equal(t, reformatJSON(json.RawMessage(expected)), reformatJSON(toProtoJSON(protoMsg)), msgAndArgs...)
} }
func reformatJSON(raw json.RawMessage) string { func reformatJSON(raw json.RawMessage) string {
@ -54,6 +60,31 @@ func toProtoJSON(protoMsg interface{}) json.RawMessage {
return bs return bs
} }
var updateFlag = flag.Bool("update", false,
"when enabled, reference files will be updated to match current behavior")
// AssertProtoJSONFileEqual asserts that a protobuf message (or slice of
// messages) matches the given reference JSON file.
//
// To update a reference JSON file, pass the test argument '-update'. This will
// overwrite the reference output to match the current behavior.
func AssertProtoJSONFileEqual(
t *testing.T, file string, protoMsg interface{}, msgAndArgs ...interface{},
) bool {
t.Helper()
if *updateFlag {
updatedJSON := formattedProtoJSON(protoMsg) + "\n"
err := os.WriteFile(file, []byte(updatedJSON), 0o644)
return assert.NoError(t, err)
}
expected, err := os.ReadFile(file)
require.NoError(t, err)
return AssertProtoJSONEqual(t, string(expected), protoMsg, msgAndArgs...)
}
// ModRoot returns the directory containing the go.mod file. // ModRoot returns the directory containing the go.mod file.
func ModRoot() string { func ModRoot() string {
dir, err := os.Getwd() dir, err := os.Getwd()