config: use stable route ids for authorize matching and order xds responses (#5618)

## Summary
Update the `RouteID` to use the `policy.ID` if it is set. This makes it
so that updated routes use a stable identifier between updates so if the
envoy control plane is updated before the authorize service's internal
definitions (or vice-versa) the authorize service will still be able to
match the route.

The current behavior results in a 404 if envoy passes the old route id.
The new behavior will result in inconsistency, but it should be quickly
remedied. To help with debugging 4 new fields were added to the
authorize check log. The `route-id` and `route-checksum` as the
authorize sees it and the `envoy-route-id` and `envoy-route-checksum` as
envoy sees it.

I also updated the way we send updates to envoy to try and model their
recommended approach:

> In general, to avoid traffic drop, sequencing of updates should follow
a make before break model, wherein:
> 
> - CDS updates (if any) must always be pushed first.
> - EDS updates (if any) must arrive after CDS updates for the
respective clusters.
> - LDS updates must arrive after corresponding CDS/EDS updates.
> - RDS updates related to the newly added listeners must arrive after
CDS/EDS/LDS updates.
> - VHDS updates (if any) related to the newly added RouteConfigurations
must arrive after RDS updates.
> - Stale CDS clusters and related EDS endpoints (ones no longer being
referenced) can then be removed.

This should help avoid 404s when configuration is being updated.

## Related issues
-
[ENG-2386](https://linear.app/pomerium/issue/ENG-2386/large-number-of-routes-leads-to-404s-and-slowness)

## Checklist
- [x] reference any related issues
- [x] updated unit tests
- [x] add appropriate label (`enhancement`, `bug`, `breaking`,
`dependencies`, `ci`)
- [x] ready for review
This commit is contained in:
Caleb Doxsey 2025-05-19 10:52:15 -06:00 committed by GitHub
parent 2f179658b6
commit 7a6d7c5a3c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 278 additions and 85 deletions

View file

@ -10,6 +10,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/pomerium/pomerium/authorize/evaluator"
"github.com/pomerium/pomerium/config"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/pkg/grpc/session"
"github.com/pomerium/pomerium/pkg/grpc/user"
@ -31,6 +32,11 @@ func Test_populateLogEvent(t *testing.T) {
Headers: map[string]string{"X-Request-Id": "CHECK-REQUEST-ID"},
IP: "127.0.0.1",
},
EnvoyRouteChecksum: 1234,
EnvoyRouteID: "ROUTE-ID",
Policy: &config.Policy{
ID: "POLICY-ID",
},
}
s := &session.Session{
Id: "SESSION-ID",
@ -65,6 +71,8 @@ func Test_populateLogEvent(t *testing.T) {
}{
{log.AuthorizeLogFieldCheckRequestID, s, `{"check-request-id":"CHECK-REQUEST-ID"}`},
{log.AuthorizeLogFieldEmail, s, `{"email":"EMAIL"}`},
{log.AuthorizeLogFieldEnvoyRouteChecksum, s, `{"envoy-route-checksum":1234}`},
{log.AuthorizeLogFieldEnvoyRouteID, s, `{"envoy-route-id":"ROUTE-ID"}`},
{log.AuthorizeLogFieldHost, s, `{"host":"HOST"}`},
{log.AuthorizeLogFieldIDToken, s, `{"id-token":"eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJPbmxpbmUgSldUIEJ1aWxkZXIiLCJpYXQiOjE2OTAzMTU4NjIsImV4cCI6MTcyMTg1MTg2MiwiYXVkIjoid3d3LmV4YW1wbGUuY29tIiwic3ViIjoianJvY2tldEBleGFtcGxlLmNvbSIsIkdpdmVuTmFtZSI6IkpvaG5ueSIsIlN1cm5hbWUiOiJSb2NrZXQiLCJFbWFpbCI6Impyb2NrZXRAZXhhbXBsZS5jb20iLCJSb2xlIjpbIk1hbmFnZXIiLCJQcm9qZWN0IEFkbWluaXN0cmF0b3IiXX0.AAojgaG0fjMFwMCAC6YALHHMFIZEedFSP_vMGhiHhso"}`},
{log.AuthorizeLogFieldIDTokenClaims, s, `{"id-token-claims":{"Email":"jrocket@example.com","GivenName":"Johnny","Role":["Manager","Project Administrator"],"Surname":"Rocket","aud":"www.example.com","exp":1721851862,"iat":1690315862,"iss":"Online JWT Builder","sub":"jrocket@example.com"}}`},
@ -77,6 +85,8 @@ func Test_populateLogEvent(t *testing.T) {
{log.AuthorizeLogFieldQuery, s, `{"query":"a=b"}`},
{log.AuthorizeLogFieldRemovedGroupsCount, s, `{"removed-groups-count":42}`},
{log.AuthorizeLogFieldRequestID, s, `{"request-id":"REQUEST-ID"}`},
{log.AuthorizeLogFieldRouteChecksum, s, `{"route-checksum":14294378844626301341}`},
{log.AuthorizeLogFieldRouteID, s, `{"route-id":"POLICY-ID"}`},
{log.AuthorizeLogFieldServiceAccountID, sa, `{"service-account-id":"SERVICE-ACCOUNT-ID"}`},
{log.AuthorizeLogFieldSessionID, s, `{"session-id":"SESSION-ID"}`},
{log.AuthorizeLogFieldUser, s, `{"user":"USER-ID"}`},