envoy: enable TCP keepalive for internal clusters

In split service mode, and during periods of inactivity, the gRPC
connections to the databroker may fall idle. Some network firewalls may
eventually time out an idle TCP connection and even start dropping
subsequent packets once connection traffic resumes. Combined with Linux
default TCP retransmission settings, this could cause a broken
connection to persist for over 15 minutes.

In an attempt to avoid this scenario, enable TCP keepalive for outbound
gRPC connections, using the OS default settings for time / interval /
probe count. (These OS settings may need to be adjusted depending on
specific network firewall configuration settings.)

Add a test case exercising the BuildClusters() method with the default
configuration options, comparing the results with a reference "golden"
file in the testdata directory. Also add an '-update' flag to make it
easier to update the reference golden when needed:

  go test ./config/envoyconfig -update
This commit is contained in:
Kenneth Jenkins 2023-12-15 10:00:45 -08:00
parent c84a251c93
commit 78fe503b57
5 changed files with 330 additions and 4 deletions

View file

@ -55,7 +55,7 @@ issues:
exclude-rules:
# Exclude some linters from running on test files.
- path: _test\.go$|^tests/|^integration/|^samples/|templates\.go$
- path: _test\.go$|^tests/|^integration/|^samples/|^internal/testutil/|templates\.go$
linters:
- bodyclose
- errcheck

View file

@ -142,6 +142,10 @@ func (b *Builder) buildInternalCluster(
) (*envoy_config_cluster_v3.Cluster, error) {
cluster := newDefaultEnvoyClusterConfig()
cluster.DnsLookupFamily = config.GetEnvoyDNSLookupFamily(cfg.Options.DNSLookupFamily)
cluster.UpstreamConnectionOptions = &envoy_config_cluster_v3.UpstreamConnectionOptions{
// Enable TCP keepalive with OS default settings.
TcpKeepalive: &envoy_config_core_v3.TcpKeepalive{},
}
var endpoints []Endpoint
for _, dst := range dsts {
ts, err := b.buildInternalTransportSocket(ctx, cfg, dst)

View file

@ -20,6 +20,23 @@ import (
"github.com/pomerium/pomerium/pkg/cryptutil"
)
func Test_BuildClusters(t *testing.T) {
// The admin address path is based on os.TempDir(), which will vary from
// system to system, so replace this with a stable location.
originalEnvoyAdminAddressPath := envoyAdminAddressPath
envoyAdminAddressPath = "/tmp/pomerium-envoy-admin.sock"
t.Cleanup(func() {
envoyAdminAddressPath = originalEnvoyAdminAddressPath
})
opts := config.NewDefaultOptions()
ctx := context.Background()
b := New("local-grpc", "local-http", "local-metrics", filemgr.NewManager(), nil)
clusters, err := b.BuildClusters(ctx, &config.Config{Options: opts})
require.NoError(t, err)
testutil.AssertProtoJSONFileEqual(t, "testdata/clusters.json", clusters)
}
func Test_buildPolicyTransportSocket(t *testing.T) {
ctx := context.Background()
cacheDir, _ := os.UserCacheDir()

View file

@ -0,0 +1,274 @@
[
{
"loadAssignment": {
"clusterName": "pomerium-acme-tls-alpn",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "127.0.0.1",
"portValue": 0
}
}
}
}
]
}
]
},
"name": "pomerium-acme-tls-alpn"
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-control-plane-grpc",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-grpc",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-control-plane-grpc",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"http2ProtocolOptions": {
"allowConnect": true,
"initialConnectionWindowSize": 1048576,
"initialStreamWindowSize": 65536,
"maxConcurrentStreams": 100
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {}
}
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-control-plane-http",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-http",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-control-plane-http",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"httpProtocolOptions": {
"headerKeyFormat": {
"statefulFormatter": {
"name": "preserve_case",
"typedConfig": {
"@type": "type.googleapis.com/envoy.extensions.http.header_formatters.preserve_case.v3.PreserveCaseFormatterConfig"
}
}
}
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {}
}
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-control-plane-metrics",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-metrics",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-control-plane-metrics",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"httpProtocolOptions": {
"headerKeyFormat": {
"statefulFormatter": {
"name": "preserve_case",
"typedConfig": {
"@type": "type.googleapis.com/envoy.extensions.http.header_formatters.preserve_case.v3.PreserveCaseFormatterConfig"
}
}
}
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {}
}
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-authorize",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-grpc",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-authorize",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"http2ProtocolOptions": {
"allowConnect": true,
"initialConnectionWindowSize": 1048576,
"initialStreamWindowSize": 65536,
"maxConcurrentStreams": 100
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {}
}
},
{
"connectTimeout": "10s",
"dnsLookupFamily": "V4_PREFERRED",
"loadAssignment": {
"clusterName": "pomerium-databroker",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"socketAddress": {
"address": "local-grpc",
"portValue": 80
}
}
},
"loadBalancingWeight": 1
}
]
}
]
},
"name": "pomerium-databroker",
"perConnectionBufferLimitBytes": 32768,
"respectDnsTtl": true,
"type": "STRICT_DNS",
"typedExtensionProtocolOptions": {
"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": {
"@type": "type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions",
"explicitHttpConfig": {
"http2ProtocolOptions": {
"allowConnect": true,
"initialConnectionWindowSize": 1048576,
"initialStreamWindowSize": 65536,
"maxConcurrentStreams": 100
}
}
}
},
"upstreamConnectionOptions": {
"tcpKeepalive": {}
}
},
{
"connectTimeout": "10s",
"loadAssignment": {
"clusterName": "pomerium-envoy-admin",
"endpoints": [
{
"lbEndpoints": [
{
"endpoint": {
"address": {
"pipe": {
"path": "/tmp/pomerium-envoy-admin.sock"
}
}
}
}
]
}
]
},
"name": "pomerium-envoy-admin"
}
]

View file

@ -3,6 +3,7 @@ package testutil
import (
"encoding/json"
"flag"
"os"
"path/filepath"
"reflect"
@ -11,6 +12,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/testing/protocmp"
@ -29,6 +31,11 @@ func AssertProtoEqual(t *testing.T, expected, actual interface{}, msgAndArgs ...
// of protobuf messages.
func AssertProtoJSONEqual(t *testing.T, expected string, protoMsg interface{}, msgAndArgs ...interface{}) bool {
t.Helper()
formattedJSON := formattedProtoJSON(protoMsg)
return assert.Equal(t, reformatJSON(json.RawMessage(expected)), formattedJSON, msgAndArgs...)
}
func formattedProtoJSON(protoMsg interface{}) string {
protoMsgVal := reflect.ValueOf(protoMsg)
if protoMsgVal.Kind() == reflect.Slice {
var protoMsgs []json.RawMessage
@ -36,10 +43,9 @@ func AssertProtoJSONEqual(t *testing.T, expected string, protoMsg interface{}, m
protoMsgs = append(protoMsgs, toProtoJSON(protoMsgVal.Index(i).Interface()))
}
bs, _ := json.Marshal(protoMsgs)
return assert.Equal(t, reformatJSON(json.RawMessage(expected)), reformatJSON(bs), msgAndArgs...)
return reformatJSON(bs)
}
return assert.Equal(t, reformatJSON(json.RawMessage(expected)), reformatJSON(toProtoJSON(protoMsg)), msgAndArgs...)
return reformatJSON(toProtoJSON(protoMsg))
}
func reformatJSON(raw json.RawMessage) string {
@ -54,6 +60,31 @@ func toProtoJSON(protoMsg interface{}) json.RawMessage {
return bs
}
var updateFlag = flag.Bool("update", false,
"when enabled, reference files will be updated to match current behavior")
// AssertProtoJSONFileEqual asserts that a protobuf message (or slice of
// messages) matches the given reference JSON file.
//
// To update a reference JSON file, pass the test argument '-update'. This will
// overwrite the reference output to match the current behavior.
func AssertProtoJSONFileEqual(
t *testing.T, file string, protoMsg interface{}, msgAndArgs ...interface{},
) bool {
t.Helper()
if *updateFlag {
updatedJSON := formattedProtoJSON(protoMsg) + "\n"
err := os.WriteFile(file, []byte(updatedJSON), 0o644)
return assert.NoError(t, err)
}
expected, err := os.ReadFile(file)
require.NoError(t, err)
return AssertProtoJSONEqual(t, string(expected), protoMsg, msgAndArgs...)
}
// ModRoot returns the directory containing the go.mod file.
func ModRoot() string {
dir, err := os.Getwd()