From 7a6d7c5a3c48a25c5ebbe2c84224ed62b58fcd10 Mon Sep 17 00:00:00 2001 From: Caleb Doxsey Date: Mon, 19 May 2025 10:52:15 -0600 Subject: [PATCH] config: use stable route ids for authorize matching and order xds responses (#5618) ## Summary Update the `RouteID` to use the `policy.ID` if it is set. This makes it so that updated routes use a stable identifier between updates so if the envoy control plane is updated before the authorize service's internal definitions (or vice-versa) the authorize service will still be able to match the route. The current behavior results in a 404 if envoy passes the old route id. The new behavior will result in inconsistency, but it should be quickly remedied. To help with debugging 4 new fields were added to the authorize check log. The `route-id` and `route-checksum` as the authorize sees it and the `envoy-route-id` and `envoy-route-checksum` as envoy sees it. I also updated the way we send updates to envoy to try and model their recommended approach: > In general, to avoid traffic drop, sequencing of updates should follow a make before break model, wherein: > > - CDS updates (if any) must always be pushed first. > - EDS updates (if any) must arrive after CDS updates for the respective clusters. > - LDS updates must arrive after corresponding CDS/EDS updates. > - RDS updates related to the newly added listeners must arrive after CDS/EDS/LDS updates. > - VHDS updates (if any) related to the newly added RouteConfigurations must arrive after RDS updates. > - Stale CDS clusters and related EDS endpoints (ones no longer being referenced) can then be removed. This should help avoid 404s when configuration is being updated. ## Related issues - [ENG-2386](https://linear.app/pomerium/issue/ENG-2386/large-number-of-routes-leads-to-404s-and-slowness) ## Checklist - [x] reference any related issues - [x] updated unit tests - [x] add appropriate label (`enhancement`, `bug`, `breaking`, `dependencies`, `ci`) - [x] ready for review --- authorize/evaluator/evaluator.go | 22 ++--- authorize/grpc.go | 10 ++- authorize/grpc_test.go | 8 -- authorize/log.go | 14 ++++ authorize/log_test.go | 10 +++ config/envoyconfig/per_filter_config.go | 21 +++-- .../envoyconfig/route_configurations_test.go | 6 +- config/envoyconfig/routes.go | 10 ++- config/envoyconfig/routes_test.go | 83 +++++++++++++------ config/policy.go | 18 +++- config/policy_test.go | 4 +- internal/controlplane/xdsmgr/xdsmgr.go | 56 +++++++++++++ internal/controlplane/xdsmgr/xdsmgr_test.go | 57 +++++++++++++ internal/databroker/config_source.go | 2 +- internal/httputil/reproxy/reproxy.go | 27 +++--- internal/log/authorize.go | 12 +++ proxy/portal/portal.go | 3 +- 17 files changed, 278 insertions(+), 85 deletions(-) diff --git a/authorize/evaluator/evaluator.go b/authorize/evaluator/evaluator.go index 3e33c9009..22997cafc 100644 --- a/authorize/evaluator/evaluator.go +++ b/authorize/evaluator/evaluator.go @@ -32,10 +32,12 @@ import ( // Request contains the inputs needed for evaluation. type Request struct { - IsInternal bool - Policy *config.Policy - HTTP RequestHTTP - Session RequestSession + IsInternal bool + Policy *config.Policy + HTTP RequestHTTP + Session RequestSession + EnvoyRouteChecksum uint64 + EnvoyRouteID string } // RequestHTTP is the HTTP field in the request. @@ -141,7 +143,7 @@ type Result struct { // An Evaluator evaluates policies. type Evaluator struct { store *store.Store - policyEvaluators map[uint64]*PolicyEvaluator + policyEvaluators map[string]*PolicyEvaluator headersEvaluators *HeadersEvaluator clientCA []byte clientCRL []byte @@ -172,7 +174,7 @@ func New( // If there is a previous Evaluator constructed from the same settings, we // can reuse the HeadersEvaluator along with any PolicyEvaluators for // unchanged policies. - var cachedPolicyEvaluators map[uint64]*PolicyEvaluator + var cachedPolicyEvaluators map[string]*PolicyEvaluator if previous != nil && previous.cfgCacheKey == e.cfgCacheKey { e.headersEvaluators = previous.headersEvaluators cachedPolicyEvaluators = previous.policyEvaluators @@ -188,18 +190,18 @@ func New( } type routeEvaluator struct { - id uint64 + id string evaluator *PolicyEvaluator } func getOrCreatePolicyEvaluators( ctx context.Context, cfg *evaluatorConfig, store *store.Store, - cachedPolicyEvaluators map[uint64]*PolicyEvaluator, -) (map[uint64]*PolicyEvaluator, error) { + cachedPolicyEvaluators map[string]*PolicyEvaluator, +) (map[string]*PolicyEvaluator, error) { now := time.Now() var reusedCount int - m := make(map[uint64]*PolicyEvaluator) + m := make(map[string]*PolicyEvaluator) var builders []errgrouputil.BuilderFunc[routeEvaluator] for i := range cfg.Policies { configPolicy := cfg.Policies[i] diff --git a/authorize/grpc.go b/authorize/grpc.go index 43c80f6f0..fc8265aee 100644 --- a/authorize/grpc.go +++ b/authorize/grpc.go @@ -182,14 +182,16 @@ func (a *Authorize) getEvaluatorRequestFromCheckRequest( ) (*evaluator.Request, error) { attrs := in.GetAttributes() req := &evaluator.Request{ - IsInternal: envoyconfig.ExtAuthzContextExtensionsIsInternal(attrs.GetContextExtensions()), - HTTP: evaluator.RequestHTTPFromCheckRequest(ctx, in), + IsInternal: envoyconfig.ExtAuthzContextExtensionsIsInternal(attrs.GetContextExtensions()), + HTTP: evaluator.RequestHTTPFromCheckRequest(ctx, in), + EnvoyRouteChecksum: envoyconfig.ExtAuthzContextExtensionsRouteChecksum(attrs.GetContextExtensions()), + EnvoyRouteID: envoyconfig.ExtAuthzContextExtensionsRouteID(attrs.GetContextExtensions()), } - req.Policy = a.getMatchingPolicy(envoyconfig.ExtAuthzContextExtensionsRouteID(attrs.GetContextExtensions())) + req.Policy = a.getMatchingPolicy(req.EnvoyRouteID) return req, nil } -func (a *Authorize) getMatchingPolicy(routeID uint64) *config.Policy { +func (a *Authorize) getMatchingPolicy(routeID string) *config.Policy { options := a.currentConfig.Load().Options for p := range options.GetAllPolicies() { diff --git a/authorize/grpc_test.go b/authorize/grpc_test.go index 561dff74a..21faee252 100644 --- a/authorize/grpc_test.go +++ b/authorize/grpc_test.go @@ -213,11 +213,3 @@ func (m mockDataBrokerServiceClient) Patch(ctx context.Context, in *databroker.P Records: putResponse.GetRecords(), }, nil } - -func mustParseURL(rawURL string) url.URL { - u, err := url.Parse(rawURL) - if err != nil { - panic(err) - } - return *u -} diff --git a/authorize/log.go b/authorize/log.go index b8c44d68e..2e1c939cc 100644 --- a/authorize/log.go +++ b/authorize/log.go @@ -143,6 +143,10 @@ func populateLogEvent( return evt.Str(string(field), req.HTTP.Headers["X-Request-Id"]) case log.AuthorizeLogFieldEmail: return evt.Str(string(field), u.GetEmail()) + case log.AuthorizeLogFieldEnvoyRouteChecksum: + return evt.Uint64(string(field), req.EnvoyRouteChecksum) + case log.AuthorizeLogFieldEnvoyRouteID: + return evt.Str(string(field), req.EnvoyRouteID) case log.AuthorizeLogFieldHost: return evt.Str(string(field), req.HTTP.Host) case log.AuthorizeLogFieldIDToken: @@ -184,6 +188,16 @@ func populateLogEvent( return evt.Str(string(field), req.HTTP.RawQuery) case log.AuthorizeLogFieldRequestID: return evt.Str(string(field), requestid.FromContext(ctx)) + case log.AuthorizeLogFieldRouteChecksum: + if req.Policy != nil { + return evt.Uint64(string(field), req.Policy.Checksum()) + } + return evt + case log.AuthorizeLogFieldRouteID: + if req.Policy != nil { + return evt.Str(string(field), req.Policy.ID) + } + return evt case log.AuthorizeLogFieldServiceAccountID: if sa, ok := s.(*user.ServiceAccount); ok { evt = evt.Str(string(field), sa.GetId()) diff --git a/authorize/log_test.go b/authorize/log_test.go index 27105563d..6a00514b3 100644 --- a/authorize/log_test.go +++ b/authorize/log_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/pomerium/pomerium/authorize/evaluator" + "github.com/pomerium/pomerium/config" "github.com/pomerium/pomerium/internal/log" "github.com/pomerium/pomerium/pkg/grpc/session" "github.com/pomerium/pomerium/pkg/grpc/user" @@ -31,6 +32,11 @@ func Test_populateLogEvent(t *testing.T) { Headers: map[string]string{"X-Request-Id": "CHECK-REQUEST-ID"}, IP: "127.0.0.1", }, + EnvoyRouteChecksum: 1234, + EnvoyRouteID: "ROUTE-ID", + Policy: &config.Policy{ + ID: "POLICY-ID", + }, } s := &session.Session{ Id: "SESSION-ID", @@ -65,6 +71,8 @@ func Test_populateLogEvent(t *testing.T) { }{ {log.AuthorizeLogFieldCheckRequestID, s, `{"check-request-id":"CHECK-REQUEST-ID"}`}, {log.AuthorizeLogFieldEmail, s, `{"email":"EMAIL"}`}, + {log.AuthorizeLogFieldEnvoyRouteChecksum, s, `{"envoy-route-checksum":1234}`}, + {log.AuthorizeLogFieldEnvoyRouteID, s, `{"envoy-route-id":"ROUTE-ID"}`}, {log.AuthorizeLogFieldHost, s, `{"host":"HOST"}`}, {log.AuthorizeLogFieldIDToken, s, `{"id-token":"eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJPbmxpbmUgSldUIEJ1aWxkZXIiLCJpYXQiOjE2OTAzMTU4NjIsImV4cCI6MTcyMTg1MTg2MiwiYXVkIjoid3d3LmV4YW1wbGUuY29tIiwic3ViIjoianJvY2tldEBleGFtcGxlLmNvbSIsIkdpdmVuTmFtZSI6IkpvaG5ueSIsIlN1cm5hbWUiOiJSb2NrZXQiLCJFbWFpbCI6Impyb2NrZXRAZXhhbXBsZS5jb20iLCJSb2xlIjpbIk1hbmFnZXIiLCJQcm9qZWN0IEFkbWluaXN0cmF0b3IiXX0.AAojgaG0fjMFwMCAC6YALHHMFIZEedFSP_vMGhiHhso"}`}, {log.AuthorizeLogFieldIDTokenClaims, s, `{"id-token-claims":{"Email":"jrocket@example.com","GivenName":"Johnny","Role":["Manager","Project Administrator"],"Surname":"Rocket","aud":"www.example.com","exp":1721851862,"iat":1690315862,"iss":"Online JWT Builder","sub":"jrocket@example.com"}}`}, @@ -77,6 +85,8 @@ func Test_populateLogEvent(t *testing.T) { {log.AuthorizeLogFieldQuery, s, `{"query":"a=b"}`}, {log.AuthorizeLogFieldRemovedGroupsCount, s, `{"removed-groups-count":42}`}, {log.AuthorizeLogFieldRequestID, s, `{"request-id":"REQUEST-ID"}`}, + {log.AuthorizeLogFieldRouteChecksum, s, `{"route-checksum":14294378844626301341}`}, + {log.AuthorizeLogFieldRouteID, s, `{"route-id":"POLICY-ID"}`}, {log.AuthorizeLogFieldServiceAccountID, sa, `{"service-account-id":"SERVICE-ACCOUNT-ID"}`}, {log.AuthorizeLogFieldSessionID, s, `{"session-id":"SESSION-ID"}`}, {log.AuthorizeLogFieldUser, s, `{"user":"USER-ID"}`}, diff --git a/config/envoyconfig/per_filter_config.go b/config/envoyconfig/per_filter_config.go index 3907b3d78..16ffd1989 100644 --- a/config/envoyconfig/per_filter_config.go +++ b/config/envoyconfig/per_filter_config.go @@ -31,10 +31,11 @@ func PerFilterConfigExtAuthzDisabled() *anypb.Any { } // MakeExtAuthzContextExtensions makes the ext authz context extensions. -func MakeExtAuthzContextExtensions(internal bool, routeID uint64) map[string]string { +func MakeExtAuthzContextExtensions(internal bool, routeID string, routeChecksum uint64) map[string]string { return map[string]string{ - "internal": strconv.FormatBool(internal), - "route_id": strconv.FormatUint(routeID, 10), + "internal": strconv.FormatBool(internal), + "route_id": routeID, + "route_checksum": strconv.FormatUint(routeChecksum, 10), } } @@ -44,10 +45,18 @@ func ExtAuthzContextExtensionsIsInternal(extAuthzContextExtensions map[string]st } // ExtAuthzContextExtensionsRouteID returns the route id for the context extensions. -func ExtAuthzContextExtensionsRouteID(extAuthzContextExtensions map[string]string) uint64 { +func ExtAuthzContextExtensionsRouteID(extAuthzContextExtensions map[string]string) string { + if extAuthzContextExtensions == nil { + return "" + } + return extAuthzContextExtensions["route_id"] +} + +// ExtAuthzContextExtensionsRouteChecksum returns the route checksum for the context extensions. +func ExtAuthzContextExtensionsRouteChecksum(extAuthzContextExtensions map[string]string) uint64 { if extAuthzContextExtensions == nil { return 0 } - routeID, _ := strconv.ParseUint(extAuthzContextExtensions["route_id"], 10, 64) - return routeID + v, _ := strconv.ParseUint(extAuthzContextExtensions["route_checksum"], 10, 64) + return v } diff --git a/config/envoyconfig/route_configurations_test.go b/config/envoyconfig/route_configurations_test.go index 04afa06ca..501cb82d8 100644 --- a/config/envoyconfig/route_configurations_test.go +++ b/config/envoyconfig/route_configurations_test.go @@ -100,7 +100,8 @@ func TestBuilder_buildMainRouteConfiguration(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "6898812972967355380" + "route_checksum": "4844561823473827050", + "route_id": "5fbd81d8f19363f4" } } } @@ -157,7 +158,8 @@ func TestBuilder_buildMainRouteConfiguration(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "6898812972967355380" + "route_checksum": "4844561823473827050", + "route_id": "5fbd81d8f19363f4" } } } diff --git a/config/envoyconfig/routes.go b/config/envoyconfig/routes.go index 52605fcf8..d0bacbab0 100644 --- a/config/envoyconfig/routes.go +++ b/config/envoyconfig/routes.go @@ -133,7 +133,7 @@ func (b *Builder) buildControlPlanePathRoute( }, ResponseHeadersToAdd: toEnvoyHeaders(options.GetSetResponseHeaders()), TypedPerFilterConfig: map[string]*anypb.Any{ - PerFilterConfigExtAuthzName: PerFilterConfigExtAuthzContextExtensions(MakeExtAuthzContextExtensions(true, 0)), + PerFilterConfigExtAuthzName: PerFilterConfigExtAuthzContextExtensions(MakeExtAuthzContextExtensions(true, "", 0)), }, } return r @@ -160,7 +160,7 @@ func (b *Builder) buildControlPlanePrefixRoute( }, ResponseHeadersToAdd: toEnvoyHeaders(options.GetSetResponseHeaders()), TypedPerFilterConfig: map[string]*anypb.Any{ - PerFilterConfigExtAuthzName: PerFilterConfigExtAuthzContextExtensions(MakeExtAuthzContextExtensions(true, 0)), + PerFilterConfigExtAuthzName: PerFilterConfigExtAuthzContextExtensions(MakeExtAuthzContextExtensions(true, "", 0)), }, } return r @@ -174,7 +174,7 @@ var getClusterID = func(policy *config.Policy) string { } id, _ := policy.RouteID() - return fmt.Sprintf("%s-%x", prefix, id) + return fmt.Sprintf("%s-%s", prefix, id) } // getClusterStatsName returns human readable name that would be used by envoy to emit statistics, available as envoy_cluster_name label @@ -281,6 +281,8 @@ func (b *Builder) buildRouteForPolicyAndMatch( return nil, err } + routeChecksum := policy.Checksum() + route := &envoy_config_route_v3.Route{ Name: name, Match: match, @@ -324,7 +326,7 @@ func (b *Builder) buildRouteForPolicyAndMatch( } } else { route.TypedPerFilterConfig = map[string]*anypb.Any{ - PerFilterConfigExtAuthzName: PerFilterConfigExtAuthzContextExtensions(MakeExtAuthzContextExtensions(false, routeID)), + PerFilterConfigExtAuthzName: PerFilterConfigExtAuthzContextExtensions(MakeExtAuthzContextExtensions(false, routeID, routeChecksum)), } luaMetadata["remove_pomerium_cookie"] = &structpb.Value{ Kind: &structpb.Value_StringValue{ diff --git a/config/envoyconfig/routes_test.go b/config/envoyconfig/routes_test.go index 75a6f968d..7082dbdd7 100644 --- a/config/envoyconfig/routes_test.go +++ b/config/envoyconfig/routes_test.go @@ -90,7 +90,8 @@ func Test_buildPomeriumHTTPRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "true", - "route_id": "0" + "route_checksum": "0", + "route_id": "" } } } @@ -169,7 +170,8 @@ func Test_buildControlPlanePathRoute(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "true", - "route_id": "0" + "route_checksum": "0", + "route_id": "" } } } @@ -216,7 +218,8 @@ func Test_buildControlPlanePrefixRoute(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "true", - "route_id": "0" + "route_checksum": "0", + "route_id": "" } } } @@ -392,14 +395,24 @@ func Test_buildPolicyRoutes(t *testing.T) { }, } routeIDs := []string{ - 1: "13553029590470792156", - 2: "7129118097581932399", - 3: "11039710722247768205", - 4: "658592019741814826", - 5: "11039710722247768205", // same as 3 - 6: "7129118097581932399", // same as 2 - 7: "7129118097581932399", // same as 2 - 8: "3463414089682043373", + 1: "bc16089b025e93dc", + 2: "62efb723582dff6f", + 3: "9934ee6936a6388d", + 4: "0923c9f3dc9e302a", + 5: "9934ee6936a6388d", // same as 3 + 6: "62efb723582dff6f", // same as 2 + 7: "62efb723582dff6f", // same as 2 + 8: "301084c3bd94c1ed", + } + routeChecksums := []string{ + 1: "1550825365439203068", + 2: "1743754064510868859", + 3: "5973469357905660470", + 4: "10629393024495644652", + 5: "17050190873730880526", + 6: "4829848755825381466", + 7: "7941222915300424536", + 8: "8084661313119959810", } b := &Builder{filemgr: filemgr.NewManager(), reproxy: reproxy.New()} @@ -481,6 +494,7 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", + "route_checksum": "`+routeChecksums[1]+`", "route_id": "`+routeIDs[1]+`" } } @@ -556,6 +570,7 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", + "route_checksum": "`+routeChecksums[2]+`", "route_id": "`+routeIDs[2]+`" } } @@ -630,6 +645,7 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", + "route_checksum": "`+routeChecksums[3]+`", "route_id": "`+routeIDs[3]+`" } } @@ -706,6 +722,7 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", + "route_checksum": "`+routeChecksums[4]+`", "route_id": "`+routeIDs[4]+`" } } @@ -781,6 +798,7 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", + "route_checksum": "`+routeChecksums[5]+`", "route_id": "`+routeIDs[5]+`" } } @@ -855,6 +873,7 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", + "route_checksum": "`+routeChecksums[6]+`", "route_id": "`+routeIDs[6]+`" } } @@ -930,6 +949,7 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", + "route_checksum": "`+routeChecksums[7]+`", "route_id": "`+routeIDs[7]+`" } } @@ -1005,6 +1025,7 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", + "route_checksum": "`+routeChecksums[8]+`", "route_id": "`+routeIDs[8]+`" } } @@ -1196,7 +1217,8 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "11022856234610764131" + "route_checksum": "16194904053254305772", + "route_id": "98f90d58022ca963" } } } @@ -1272,7 +1294,8 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "9302002763161476568" + "route_checksum": "10734663134999137402", + "route_id": "81175a3a9df11dd8" } } } @@ -1369,7 +1392,8 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "12468817303959353203" + "route_checksum": "6431934433938620139", + "route_id": "ad0a23467bbdb773" } } } @@ -1471,7 +1495,8 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "1158488049891246013" + "route_checksum": "185312241489123079", + "route_id": "1013c6be524d7fbd" } } } @@ -1547,14 +1572,14 @@ func Test_buildPolicyRoutes(t *testing.T) { "appendAction": "OVERWRITE_IF_EXISTS_OR_ADD", "header": { "key": "x-pomerium-reproxy-policy", - "value": "12114237825990386381" + "value": "a81e6b1e66c1e2cd" } }, { "appendAction": "OVERWRITE_IF_EXISTS_OR_ADD", "header": { "key": "x-pomerium-reproxy-policy-hmac", - "value": "pe3ai+2H8rHB5zgHi8+ryY6VDcuZZ5pf9Rfkrw0NdBE=" + "value": "0EisedpElEUeBI5OPTVcMtza+Yyju2lsbSoBya2jBJ0=" } } ], @@ -1586,7 +1611,8 @@ func Test_buildPolicyRoutes(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "12114237825990386381" + "route_checksum": "10135716377738705841", + "route_id": "a81e6b1e66c1e2cd" } } } @@ -1720,7 +1746,8 @@ func Test_buildPolicyRoutesRewrite(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "5575146962731507525" + "route_checksum": "14883526649905267120", + "route_id": "4d5ee69fcc359f45" } } } @@ -1795,7 +1822,8 @@ func Test_buildPolicyRoutesRewrite(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "5575146962731507525" + "route_checksum": "10046758419081543299", + "route_id": "4d5ee69fcc359f45" } } } @@ -1875,7 +1903,8 @@ func Test_buildPolicyRoutesRewrite(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "5575146962731507525" + "route_checksum": "2754443979682419848", + "route_id": "4d5ee69fcc359f45" } } } @@ -1950,7 +1979,8 @@ func Test_buildPolicyRoutesRewrite(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "5575146962731507525" + "route_checksum": "1546259218106347577", + "route_id": "4d5ee69fcc359f45" } } } @@ -2025,7 +2055,8 @@ func Test_buildPolicyRoutesRewrite(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "5575146962731507525" + "route_checksum": "8023363204239692999", + "route_id": "4d5ee69fcc359f45" } } } @@ -2105,7 +2136,8 @@ func Test_buildPolicyRoutesRewrite(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "false", - "route_id": "5575146962731507525" + "route_checksum": "5173412791633652167", + "route_id": "4d5ee69fcc359f45" } } } @@ -2280,7 +2312,8 @@ func Test_buildPomeriumHTTPRoutesWithMCP(t *testing.T) { "checkSettings": { "contextExtensions": { "internal": "true", - "route_id": "0" + "route_checksum": "0", + "route_id": "" } } } diff --git a/config/policy.go b/config/policy.go index 613a2dc09..5a5d0193a 100644 --- a/config/policy.go +++ b/config/policy.go @@ -3,6 +3,7 @@ package config import ( "crypto/tls" "encoding/base64" + "encoding/hex" "fmt" "net/http" "net/url" @@ -761,7 +762,15 @@ func (p *Policy) Checksum() uint64 { // - path // - regex // - to/redirect/response (whichever is set) -func (p *Policy) RouteID() (uint64, error) { +func (p *Policy) RouteID() (string, error) { + if p.ID != "" { + return p.ID, nil + } + + return p.generateRouteID() +} + +func (p *Policy) generateRouteID() (string, error) { // this function is in the hot path, try not to allocate too much memory here hash := hashutil.NewDigest() hash.WriteStringWithLen(p.From) @@ -807,12 +816,13 @@ func (p *Policy) RouteID() (uint64, error) { hash.WriteInt32(int32(p.Response.Status)) hash.WriteStringWithLen(p.Response.Body) default: - return 0, errEitherToOrRedirectOrResponseRequired + return "", errEitherToOrRedirectOrResponseRequired } - return hash.Sum64(), nil + + return hex.EncodeToString(hash.Sum(nil)), nil } -func (p *Policy) MustRouteID() uint64 { +func (p *Policy) MustRouteID() string { id, err := p.RouteID() if err != nil { panic(err) diff --git a/config/policy_test.go b/config/policy_test.go index 025fbb9d0..384a68e73 100644 --- a/config/policy_test.go +++ b/config/policy_test.go @@ -489,7 +489,7 @@ func TestRouteID(t *testing.T) { } t.Run("random policies", func(t *testing.T) { - hashes := make(map[uint64]struct{}, 10000) + hashes := make(map[string]struct{}, 10000) for i := 0; i < 10000; i++ { p := Policy{} for _, m := range baseFieldMutators { @@ -522,7 +522,7 @@ func TestRouteID(t *testing.T) { assert.Len(t, hashes, 10000) }) t.Run("incremental policy", func(t *testing.T) { - hashes := make(map[uint64]Policy, 5000) + hashes := make(map[string]Policy, 5000) p := Policy{} diff --git a/internal/controlplane/xdsmgr/xdsmgr.go b/internal/controlplane/xdsmgr/xdsmgr.go index b402ac2d9..283154140 100644 --- a/internal/controlplane/xdsmgr/xdsmgr.go +++ b/internal/controlplane/xdsmgr/xdsmgr.go @@ -2,7 +2,9 @@ package xdsmgr import ( + "cmp" "context" + "slices" "sync" envoy_service_discovery_v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" @@ -10,6 +12,7 @@ import ( "golang.org/x/sync/errgroup" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" "github.com/pomerium/pomerium/internal/log" "github.com/pomerium/pomerium/internal/signal" @@ -186,12 +189,17 @@ func (mgr *Manager) DeltaAggregatedResources( mgr.mu.Unlock() } + var responses []*envoy_service_discovery_v3.DeltaDiscoveryResponse for _, typeURL := range typeURLs { res := getDeltaResponse(changeCtx, typeURL) if res == nil { continue } + responses = append(responses, res) + } + responses = buildDiscoveryResponsesForConsistentUpdates(responses) + for _, res := range responses { select { case <-ctx.Done(): return context.Cause(ctx) @@ -242,3 +250,51 @@ func (mgr *Manager) Update(ctx context.Context, resources map[string][]*envoy_se mgr.signal.Broadcast(ctx) } + +func buildDiscoveryResponsesForConsistentUpdates(in []*envoy_service_discovery_v3.DeltaDiscoveryResponse) (out []*envoy_service_discovery_v3.DeltaDiscoveryResponse) { + var updates, removals []*envoy_service_discovery_v3.DeltaDiscoveryResponse + for _, r := range in { + if len(r.Resources) > 0 { + rr := proto.Clone(r).(*envoy_service_discovery_v3.DeltaDiscoveryResponse) + rr.RemovedResources = nil + updates = append(updates, rr) + } + if len(r.RemovedResources) > 0 { + rr := proto.Clone(r).(*envoy_service_discovery_v3.DeltaDiscoveryResponse) + rr.Resources = nil + removals = append(removals, rr) + } + } + + // from the docs: + // + // In general, to avoid traffic drop, sequencing of updates should follow a make before break model, wherein: + // + // CDS updates (if any) must always be pushed first. + // EDS updates (if any) must arrive after CDS updates for the respective clusters. + // LDS updates must arrive after corresponding CDS/EDS updates. + // RDS updates related to the newly added listeners must arrive after CDS/EDS/LDS updates. + // VHDS updates (if any) related to the newly added RouteConfigurations must arrive after RDS updates. + // Stale CDS clusters and related EDS endpoints (ones no longer being referenced) can then be removed. + + updateOrder := map[string]int{ + clusterTypeURL: 1, + listenerTypeURL: 2, + routeConfigurationTypeURL: 3, + } + slices.SortFunc(updates, func(a, b *envoy_service_discovery_v3.DeltaDiscoveryResponse) int { + return cmp.Compare(updateOrder[a.TypeUrl], updateOrder[b.TypeUrl]) + }) + + removeOrder := map[string]int{ + routeConfigurationTypeURL: 1, + listenerTypeURL: 2, + clusterTypeURL: 3, + } + slices.SortFunc(removals, func(a, b *envoy_service_discovery_v3.DeltaDiscoveryResponse) int { + return cmp.Compare(removeOrder[a.TypeUrl], removeOrder[b.TypeUrl]) + }) + + out = append(updates, removals...) + return out +} diff --git a/internal/controlplane/xdsmgr/xdsmgr_test.go b/internal/controlplane/xdsmgr/xdsmgr_test.go index 3e2504771..a3d2708f5 100644 --- a/internal/controlplane/xdsmgr/xdsmgr_test.go +++ b/internal/controlplane/xdsmgr/xdsmgr_test.go @@ -6,6 +6,9 @@ import ( "testing" "time" + envoy_config_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" + envoy_config_listener_v3 "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" + envoy_config_route_v3 "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" envoy_service_discovery_v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -14,6 +17,8 @@ import ( "google.golang.org/grpc/test/bufconn" "github.com/pomerium/pomerium/internal/signal" + "github.com/pomerium/pomerium/internal/testutil" + "github.com/pomerium/pomerium/pkg/protoutil" ) const bufSize = 1024 * 1024 @@ -117,3 +122,55 @@ func TestManager(t *testing.T) { }, time.Second*5, time.Millisecond) }) } + +func TestBuildDiscoveryResponsesForConsistentUpdates(t *testing.T) { + t.Parallel() + + rc1 := protoutil.NewAny(&envoy_config_route_v3.RouteConfiguration{}) + l1 := protoutil.NewAny(&envoy_config_listener_v3.Listener{}) + c1 := protoutil.NewAny(&envoy_config_cluster_v3.Cluster{}) + + responses := buildDiscoveryResponsesForConsistentUpdates([]*envoy_service_discovery_v3.DeltaDiscoveryResponse{ + { + TypeUrl: routeConfigurationTypeURL, + Resources: []*envoy_service_discovery_v3.Resource{{Name: "rc1", Resource: rc1}}, + RemovedResources: []string{"rc2"}, + }, + { + TypeUrl: listenerTypeURL, + Resources: []*envoy_service_discovery_v3.Resource{{Name: "l1", Resource: l1}}, + RemovedResources: []string{"l2"}, + }, + { + TypeUrl: clusterTypeURL, + Resources: []*envoy_service_discovery_v3.Resource{{Name: "c1", Resource: c1}}, + RemovedResources: []string{"c2"}, + }, + }) + testutil.AssertProtoEqual(t, []*envoy_service_discovery_v3.DeltaDiscoveryResponse{ + { + TypeUrl: clusterTypeURL, + Resources: []*envoy_service_discovery_v3.Resource{{Name: "c1", Resource: c1}}, + }, + { + TypeUrl: listenerTypeURL, + Resources: []*envoy_service_discovery_v3.Resource{{Name: "l1", Resource: l1}}, + }, + { + TypeUrl: routeConfigurationTypeURL, + Resources: []*envoy_service_discovery_v3.Resource{{Name: "rc1", Resource: rc1}}, + }, + { + TypeUrl: routeConfigurationTypeURL, + RemovedResources: []string{"rc2"}, + }, + { + TypeUrl: listenerTypeURL, + RemovedResources: []string{"l2"}, + }, + { + TypeUrl: clusterTypeURL, + RemovedResources: []string{"c2"}, + }, + }, responses) +} diff --git a/internal/databroker/config_source.go b/internal/databroker/config_source.go index 802a11495..62a4f91ff 100644 --- a/internal/databroker/config_source.go +++ b/internal/databroker/config_source.go @@ -204,7 +204,7 @@ func (src *ConfigSource) buildPolicyFromProto(_ context.Context, routepb *config } func (src *ConfigSource) addPolicies(ctx context.Context, cfg *config.Config, policies []*config.Policy) { - seen := make(map[uint64]struct{}, len(policies)+cfg.Options.NumPolicies()) + seen := make(map[string]struct{}, len(policies)+cfg.Options.NumPolicies()) for policy := range cfg.Options.GetAllPolicies() { id, err := policy.RouteID() if err != nil { diff --git a/internal/httputil/reproxy/reproxy.go b/internal/httputil/reproxy/reproxy.go index b2e696d75..a1065a419 100644 --- a/internal/httputil/reproxy/reproxy.go +++ b/internal/httputil/reproxy/reproxy.go @@ -10,7 +10,6 @@ import ( "net/http" stdhttputil "net/http/httputil" "net/url" - "strconv" "strings" "sync" @@ -29,46 +28,40 @@ type Handler struct { mu sync.RWMutex key []byte options *config.Options - policies map[uint64]*config.Policy + policies map[string]*config.Policy } // New creates a new Handler. func New() *Handler { h := new(Handler) - h.policies = make(map[uint64]*config.Policy) + h.policies = make(map[string]*config.Policy) return h } // GetPolicyIDFromHeaders gets a policy id from http headers. If no policy id is found // or the HMAC isn't valid, false will be returned. -func (h *Handler) GetPolicyIDFromHeaders(headers http.Header) (uint64, bool) { - policyStr := headers.Get(httputil.HeaderPomeriumReproxyPolicy) +func (h *Handler) GetPolicyIDFromHeaders(headers http.Header) (string, bool) { + policyID := headers.Get(httputil.HeaderPomeriumReproxyPolicy) hmacStr := headers.Get(httputil.HeaderPomeriumReproxyPolicyHMAC) hmac, err := base64.StdEncoding.DecodeString(hmacStr) if err != nil { - return 0, false - } - - policyID, err := strconv.ParseUint(policyStr, 10, 64) - if err != nil { - return 0, false + return "", false } h.mu.RLock() defer h.mu.RUnlock() - return policyID, cryptutil.CheckHMAC([]byte(policyStr), hmac, h.key) + return policyID, cryptutil.CheckHMAC([]byte(policyID), hmac, h.key) } // GetPolicyIDHeaders returns http headers for the given policy id. -func (h *Handler) GetPolicyIDHeaders(policyID uint64) [][2]string { +func (h *Handler) GetPolicyIDHeaders(policyID string) [][2]string { h.mu.RLock() defer h.mu.RUnlock() - s := strconv.FormatUint(policyID, 10) - hmac := base64.StdEncoding.EncodeToString(cryptutil.GenerateHMAC([]byte(s), h.key)) + hmac := base64.StdEncoding.EncodeToString(cryptutil.GenerateHMAC([]byte(policyID), h.key)) return [][2]string{ - {httputil.HeaderPomeriumReproxyPolicy, s}, + {httputil.HeaderPomeriumReproxyPolicy, policyID}, {httputil.HeaderPomeriumReproxyPolicyHMAC, hmac}, } } @@ -133,7 +126,7 @@ func (h *Handler) Update(ctx context.Context, cfg *config.Config) { h.key, _ = cfg.Options.GetSharedKey() h.options = cfg.Options - h.policies = make(map[uint64]*config.Policy, cfg.Options.NumPolicies()) + h.policies = make(map[string]*config.Policy, cfg.Options.NumPolicies()) for p := range cfg.Options.GetAllPolicies() { id, err := p.RouteID() if err != nil { diff --git a/internal/log/authorize.go b/internal/log/authorize.go index f2f0c07e2..5065af112 100644 --- a/internal/log/authorize.go +++ b/internal/log/authorize.go @@ -12,6 +12,8 @@ type AuthorizeLogField string const ( AuthorizeLogFieldCheckRequestID AuthorizeLogField = "check-request-id" AuthorizeLogFieldEmail AuthorizeLogField = "email" + AuthorizeLogFieldEnvoyRouteChecksum AuthorizeLogField = "envoy-route-checksum" + AuthorizeLogFieldEnvoyRouteID AuthorizeLogField = "envoy-route-id" AuthorizeLogFieldHeaders = AuthorizeLogField(headersFieldName) AuthorizeLogFieldHost AuthorizeLogField = "host" AuthorizeLogFieldIDToken AuthorizeLogField = "id-token" @@ -25,6 +27,8 @@ const ( AuthorizeLogFieldQuery AuthorizeLogField = "query" AuthorizeLogFieldRemovedGroupsCount AuthorizeLogField = "removed-groups-count" AuthorizeLogFieldRequestID AuthorizeLogField = "request-id" + AuthorizeLogFieldRouteChecksum AuthorizeLogField = "route-checksum" + AuthorizeLogFieldRouteID AuthorizeLogField = "route-id" AuthorizeLogFieldServiceAccountID AuthorizeLogField = "service-account-id" AuthorizeLogFieldSessionID AuthorizeLogField = "session-id" AuthorizeLogFieldUser AuthorizeLogField = "user" @@ -46,6 +50,10 @@ var DefaultAuthorizeLogFields = []AuthorizeLogField{ AuthorizeLogFieldServiceAccountID, AuthorizeLogFieldUser, AuthorizeLogFieldEmail, + AuthorizeLogFieldEnvoyRouteChecksum, + AuthorizeLogFieldEnvoyRouteID, + AuthorizeLogFieldRouteChecksum, + AuthorizeLogFieldRouteID, } // ErrUnknownAuthorizeLogField indicates that an authorize log field is unknown. @@ -54,6 +62,8 @@ var ErrUnknownAuthorizeLogField = errors.New("unknown authorize log field") var authorizeLogFieldLookup = map[AuthorizeLogField]struct{}{ AuthorizeLogFieldCheckRequestID: {}, AuthorizeLogFieldEmail: {}, + AuthorizeLogFieldEnvoyRouteChecksum: {}, + AuthorizeLogFieldEnvoyRouteID: {}, AuthorizeLogFieldHeaders: {}, AuthorizeLogFieldHost: {}, AuthorizeLogFieldIDToken: {}, @@ -67,6 +77,8 @@ var authorizeLogFieldLookup = map[AuthorizeLogField]struct{}{ AuthorizeLogFieldQuery: {}, AuthorizeLogFieldRemovedGroupsCount: {}, AuthorizeLogFieldRequestID: {}, + AuthorizeLogFieldRouteChecksum: {}, + AuthorizeLogFieldRouteID: {}, AuthorizeLogFieldServiceAccountID: {}, AuthorizeLogFieldSessionID: {}, AuthorizeLogFieldUser: {}, diff --git a/proxy/portal/portal.go b/proxy/portal/portal.go index f1b57e55b..8e964fafb 100644 --- a/proxy/portal/portal.go +++ b/proxy/portal/portal.go @@ -2,7 +2,6 @@ package portal import ( - "fmt" "strings" "github.com/pomerium/pomerium/config" @@ -28,7 +27,7 @@ func RoutesFromConfigRoutes(routes []*config.Policy) []Route { pr := Route{} pr.ID = route.ID if pr.ID == "" { - pr.ID = fmt.Sprintf("%x", route.MustRouteID()) + pr.ID = route.MustRouteID() } pr.Name = route.Name pr.From = route.From