github: use GraphQL API to reduce number of API calls for directory sync (#2715)

* github: use GraphQL API to reduce number of API calls for directory sync

* fix id encoding

* github: use slug instead of id, update upgrading.md

* Update docs/docs/upgrading.md

Co-authored-by: Alex Fornuto <afornuto@pomerium.com>

Co-authored-by: Alex Fornuto <afornuto@pomerium.com>
This commit is contained in:
Caleb Doxsey 2021-10-27 11:50:48 -06:00 committed by GitHub
parent d390e80b30
commit 99b905a336
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 499 additions and 204 deletions

View file

@ -10,8 +10,6 @@ import (
"net/http"
"net/url"
"sort"
"strconv"
"strings"
"github.com/rs/zerolog"
"github.com/tomnomnom/linkheader"
@ -141,45 +139,48 @@ func (p *Provider) UserGroups(ctx context.Context) ([]*directory.Group, []*direc
var allGroups []*directory.Group
for _, orgSlug := range orgSlugs {
groups, err := p.listGroups(ctx, orgSlug)
teams, err := p.listOrganizationTeamsWithMemberIDs(ctx, orgSlug)
if err != nil {
return nil, nil, err
}
for _, group := range groups {
userLogins, err := p.listTeamMembers(ctx, orgSlug, group.Name)
if err != nil {
return nil, nil, err
}
for _, userLogin := range userLogins {
userLoginToGroups[userLogin] = append(userLoginToGroups[userLogin], group.Id)
for _, team := range teams {
allGroups = append(allGroups, &directory.Group{
Id: team.Slug,
Name: team.Slug,
})
for _, memberID := range team.MemberIDs {
userLoginToGroups[memberID] = append(userLoginToGroups[memberID], team.Slug)
}
}
allGroups = append(allGroups, groups...)
}
var users []*directory.User
for userLogin, groups := range userLoginToGroups {
u, err := p.getUser(ctx, userLogin)
if err != nil {
return nil, nil, err
}
user := &directory.User{
Id: userLogin,
GroupIds: groups,
DisplayName: u.Name,
Email: u.Email,
}
sort.Strings(user.GroupIds)
users = append(users, user)
}
sort.Slice(users, func(i, j int) bool {
return users[i].GetId() < users[j].GetId()
sort.Slice(allGroups, func(i, j int) bool {
return allGroups[i].Id < allGroups[j].Id
})
return allGroups, users, nil
var allUsers []*directory.User
for _, orgSlug := range orgSlugs {
members, err := p.listOrganizationMembers(ctx, orgSlug)
if err != nil {
return nil, nil, err
}
for _, member := range members {
du := &directory.User{
Id: member.Login,
GroupIds: userLoginToGroups[member.ID],
DisplayName: member.Name,
Email: member.Email,
}
sort.Strings(du.GroupIds)
allUsers = append(allUsers, du)
}
}
sort.Slice(allUsers, func(i, j int) bool {
return allUsers[i].Id < allUsers[j].Id
})
return allGroups, allUsers, nil
}
func (p *Provider) listOrgs(ctx context.Context) (orgSlugs []string, err error) {
@ -206,59 +207,6 @@ func (p *Provider) listOrgs(ctx context.Context) (orgSlugs []string, err error)
return orgSlugs, nil
}
func (p *Provider) listGroups(ctx context.Context, orgSlug string) ([]*directory.Group, error) {
nextURL := p.cfg.url.ResolveReference(&url.URL{
Path: fmt.Sprintf("/orgs/%s/teams", orgSlug),
}).String()
var groups []*directory.Group
for nextURL != "" {
var results []struct {
ID int `json:"id"`
Slug string `json:"slug"`
}
hdrs, err := p.api(ctx, nextURL, &results)
if err != nil {
return nil, err
}
for _, result := range results {
groups = append(groups, &directory.Group{
Id: strconv.Itoa(result.ID),
Name: result.Slug,
})
}
nextURL = getNextLink(hdrs)
}
return groups, nil
}
func (p *Provider) listTeamMembers(ctx context.Context, orgSlug, teamSlug string) (userLogins []string, err error) {
nextURL := p.cfg.url.ResolveReference(&url.URL{
Path: fmt.Sprintf("/orgs/%s/teams/%s/members", orgSlug, teamSlug),
}).String()
for nextURL != "" {
var results []struct {
Login string `json:"login"`
}
hdrs, err := p.api(ctx, nextURL, &results)
if err != nil {
return nil, err
}
for _, result := range results {
userLogins = append(userLogins, result.Login)
}
nextURL = getNextLink(hdrs)
}
return userLogins, err
}
func (p *Provider) getUser(ctx context.Context, userLogin string) (*apiUserObject, error) {
apiURL := p.cfg.url.ResolveReference(&url.URL{
Path: fmt.Sprintf("/users/%s", userLogin),
@ -273,81 +221,6 @@ func (p *Provider) getUser(ctx context.Context, userLogin string) (*apiUserObjec
return &res, nil
}
func (p *Provider) listUserOrganizationTeams(ctx context.Context, userSlug string, orgSlug string) ([]string, error) {
// GitHub's Rest API doesn't have an easy way of querying this data, so we use the GraphQL API.
enc := func(obj interface{}) string {
bs, _ := json.Marshal(obj)
return string(bs)
}
const pageCount = 100
var teamIDs []string
var cursor *string
for {
var res struct {
Data struct {
Organization struct {
Teams struct {
TotalCount int `json:"totalCount"`
PageInfo struct {
EndCursor string `json:"endCursor"`
} `json:"pageInfo"`
Edges []struct {
Node struct {
ID string `json:"id"`
} `json:"node"`
} `json:"edges"`
} `json:"teams"`
} `json:"organization"`
} `json:"data"`
}
cursorStr := ""
if cursor != nil {
cursorStr = fmt.Sprintf(",%s", enc(*cursor))
}
q := fmt.Sprintf(`query {
organization(login:%s) {
teams(first:%s, userLogins:[%s] %s) {
totalCount
pageInfo {
endCursor
}
edges {
node {
id
}
}
}
}
}`, enc(orgSlug), enc(pageCount), enc(userSlug), cursorStr)
_, err := p.graphql(ctx, q, &res)
if err != nil {
return nil, err
}
if len(res.Data.Organization.Teams.Edges) == 0 {
break
}
for _, edge := range res.Data.Organization.Teams.Edges {
teamID, err := decodeTeamID(edge.Node.ID)
if err != nil {
return nil, err
}
teamIDs = append(teamIDs, teamID)
}
if len(teamIDs) >= res.Data.Organization.Teams.TotalCount {
break
}
cursor = &res.Data.Organization.Teams.PageInfo.EndCursor
}
return teamIDs, nil
}
func (p *Provider) api(ctx context.Context, apiURL string, out interface{}) (http.Header, error) {
req, err := http.NewRequestWithContext(ctx, "GET", apiURL, nil)
if err != nil {
@ -410,21 +283,6 @@ func (p *Provider) graphql(ctx context.Context, query string, out interface{}) (
return res.Header, nil
}
func decodeTeamID(src string) (string, error) {
// Github graphql API returns base64 encoded string.
// See https://developer.github.com/v4/scalar/id/
s, err := base64.StdEncoding.DecodeString(src)
if err != nil {
return "", fmt.Errorf("github: failed to decode base64 team id: %w", err)
}
// Team ID is formed like as "04:Team12345"
sep := strings.SplitN(string(s), ":", 2)
if len(sep) != 2 {
return "", fmt.Errorf("github: invalid team id: %s", s)
}
return strings.TrimPrefix(sep[1], "Team"), nil
}
func getNextLink(hdrs http.Header) string {
for _, link := range linkheader.ParseMultiple(hdrs.Values("Link")) {
if link.Rel == "next" {
@ -469,3 +327,10 @@ type apiUserObject struct {
Name string `json:"name"`
Email string `json:"email"`
}
type teamWithMemberIDs struct {
ID string
Slug string
Name string
MemberIDs []string
}

View file

@ -11,6 +11,8 @@ import (
"github.com/go-chi/chi"
"github.com/go-chi/chi/middleware"
"github.com/stretchr/testify/assert"
"github.com/vektah/gqlparser/ast"
"github.com/vektah/gqlparser/parser"
"github.com/pomerium/pomerium/internal/testutil"
)
@ -35,26 +37,198 @@ func newMockAPI(t *testing.T, srv *httptest.Server) http.Handler {
}
json.NewDecoder(r.Body).Decode(&body)
_ = json.NewEncoder(w).Encode(M{
"data": M{
"organization": M{
"teams": M{
"totalCount": 3,
"edges": []M{
{"node": M{
"id": "MDQ6VGVhbTE=",
}},
{"node": M{
"id": "MDQ6VGVhbTI=",
}},
{"node": M{
"id": "MDQ6VGVhbTM=",
}},
},
},
},
},
q, err := parser.ParseQuery(&ast.Source{
Input: body.Query,
})
if err != nil {
panic(err)
}
result := qlResult{
Data: &qlData{
Organization: &qlOrganization{},
},
}
handleMembersWithRole := func(orgSlug string, field *ast.Field) {
membersWithRole := &qlMembersWithRoleConnection{}
var cursor string
for _, arg := range field.Arguments {
if arg.Name == "after" {
cursor = arg.Value.Raw
}
}
switch cursor {
case `null`:
switch orgSlug {
case "org1":
membersWithRole.PageInfo = qlPageInfo{EndCursor: "TOKEN1", HasNextPage: true}
membersWithRole.Nodes = []qlUser{
{ID: "user1", Login: "user1", Name: "User 1", Email: "user1@example.com"},
{ID: "user2", Login: "user2", Name: "User 2", Email: "user2@example.com"},
}
case "org2":
membersWithRole.PageInfo = qlPageInfo{HasNextPage: false}
membersWithRole.Nodes = []qlUser{
{ID: "user4", Login: "user4", Name: "User 4", Email: "user4@example.com"},
}
default:
t.Errorf("unexpected org slug: %s", orgSlug)
}
case `TOKEN1`:
membersWithRole.PageInfo = qlPageInfo{HasNextPage: false}
membersWithRole.Nodes = []qlUser{
{ID: "user3", Login: "user3", Name: "User 3", Email: "user3@example.com"},
}
default:
t.Errorf("unexpected cursor: %s", cursor)
}
result.Data.Organization.MembersWithRole = membersWithRole
}
handleTeamMembers := func(orgSlug, teamSlug string, field *ast.Field) {
result.Data.Organization.Team.Members = &qlTeamMemberConnection{
PageInfo: qlPageInfo{HasNextPage: false},
}
switch teamSlug {
case "team3":
result.Data.Organization.Team.Members.Edges = []qlTeamMemberEdge{
{Node: qlUser{ID: "user3"}},
}
}
}
handleTeam := func(orgSlug string, field *ast.Field) {
result.Data.Organization.Team = &qlTeam{}
var teamSlug string
for _, arg := range field.Arguments {
if arg.Name == "slug" {
teamSlug = arg.Value.Raw
}
}
for _, selection := range field.SelectionSet {
subField, ok := selection.(*ast.Field)
if !ok {
continue
}
switch subField.Name {
case "members":
handleTeamMembers(orgSlug, teamSlug, subField)
}
}
}
handleTeams := func(orgSlug string, field *ast.Field) {
teams := &qlTeamConnection{}
var cursor string
var userLogin string
for _, arg := range field.Arguments {
if arg.Name == "after" {
cursor = arg.Value.Raw
}
if arg.Name == "userLogins" {
userLogin = arg.Value.Children[0].Value.Raw
}
}
switch cursor {
case `null`:
switch orgSlug {
case "org1":
teams.PageInfo = qlPageInfo{HasNextPage: true, EndCursor: "TOKEN1"}
teams.Edges = []qlTeamEdge{
{Node: qlTeam{ID: "MDQ6VGVhbTE=", Slug: "team1", Name: "Team 1", Members: &qlTeamMemberConnection{
PageInfo: qlPageInfo{HasNextPage: false},
Edges: []qlTeamMemberEdge{
{Node: qlUser{ID: "user1"}},
{Node: qlUser{ID: "user2"}},
},
}}},
}
case "org2":
teams.PageInfo = qlPageInfo{HasNextPage: false}
teams.Edges = []qlTeamEdge{
{Node: qlTeam{ID: "MDQ6VGVhbTM=", Slug: "team3", Name: "Team 3", Members: &qlTeamMemberConnection{
PageInfo: qlPageInfo{HasNextPage: true, EndCursor: "TOKEN1"},
Edges: []qlTeamMemberEdge{
{Node: qlUser{ID: "user1"}},
{Node: qlUser{ID: "user2"}},
},
}}},
}
if userLogin == "" || userLogin == "user4" {
teams.Edges = append(teams.Edges, qlTeamEdge{
Node: qlTeam{ID: "MDQ6VGVhbTQ=", Slug: "team4", Name: "Team 4", Members: &qlTeamMemberConnection{
PageInfo: qlPageInfo{HasNextPage: false},
Edges: []qlTeamMemberEdge{
{Node: qlUser{ID: "user4"}},
},
}},
})
}
default:
t.Errorf("unexpected org slug: %s", orgSlug)
}
case "TOKEN1":
teams.PageInfo = qlPageInfo{HasNextPage: false}
teams.Edges = []qlTeamEdge{
{Node: qlTeam{ID: "MDQ6VGVhbTI=", Slug: "team2", Name: "Team 2", Members: &qlTeamMemberConnection{
PageInfo: qlPageInfo{HasNextPage: false},
Edges: []qlTeamMemberEdge{
{Node: qlUser{ID: "user1"}},
},
}}},
}
default:
t.Errorf("unexpected cursor: %s", cursor)
}
result.Data.Organization.Teams = teams
}
handleOrganization := func(field *ast.Field) {
var orgSlug string
for _, arg := range field.Arguments {
if arg.Name == "login" {
orgSlug = arg.Value.Raw
}
}
for _, orgSelection := range field.SelectionSet {
orgField, ok := orgSelection.(*ast.Field)
if !ok {
continue
}
switch orgField.Name {
case "teams":
handleTeams(orgSlug, orgField)
case "team":
handleTeam(orgSlug, orgField)
case "membersWithRole":
handleMembersWithRole(orgSlug, orgField)
}
}
}
for _, operation := range q.Operations {
for _, selection := range operation.SelectionSet {
field, ok := selection.(*ast.Field)
if !ok {
continue
}
if field.Name != "organization" {
continue
}
handleOrganization(field)
}
}
_ = json.NewEncoder(w).Encode(result)
})
r.Get("/user/orgs", func(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode([]M{
@ -136,7 +310,7 @@ func TestProvider_User(t *testing.T) {
}
testutil.AssertProtoJSONEqual(t, `{
"id": "user1",
"groupIds": ["1", "2", "3"],
"groupIds": ["team1", "team2", "team3"],
"displayName": "User 1",
"email": "user1@example.com"
}`, du)
@ -160,16 +334,16 @@ func TestProvider_UserGroups(t *testing.T) {
groups, users, err := p.UserGroups(context.Background())
assert.NoError(t, err)
testutil.AssertProtoJSONEqual(t, `[
{ "id": "user1", "groupIds": ["1", "2", "3"], "displayName": "User 1", "email": "user1@example.com" },
{ "id": "user2", "groupIds": ["1", "3"], "displayName": "User 2", "email": "user2@example.com" },
{ "id": "user3", "groupIds": ["3"], "displayName": "User 3", "email": "user3@example.com" },
{ "id": "user4", "groupIds": ["4"], "displayName": "User 4", "email": "user4@example.com" }
{ "id": "user1", "groupIds": ["team1", "team2", "team3"], "displayName": "User 1", "email": "user1@example.com" },
{ "id": "user2", "groupIds": ["team1", "team3"], "displayName": "User 2", "email": "user2@example.com" },
{ "id": "user3", "groupIds": ["team3"], "displayName": "User 3", "email": "user3@example.com" },
{ "id": "user4", "groupIds": ["team4"], "displayName": "User 4", "email": "user4@example.com" }
]`, users)
testutil.AssertProtoJSONEqual(t, `[
{ "id": "1", "name": "team1" },
{ "id": "2", "name": "team2" },
{ "id": "3", "name": "team3" },
{ "id": "4", "name": "team4" }
{ "id": "team1", "name": "team1" },
{ "id": "team2", "name": "team2" },
{ "id": "team3", "name": "team3" },
{ "id": "team4", "name": "team4" }
]`, groups)
}

View file

@ -0,0 +1,245 @@
package github
import (
"context"
"encoding/json"
"fmt"
)
const maxPageCount = 100
type (
qlData struct {
Organization *qlOrganization `json:"organization"`
}
qlMembersWithRoleConnection struct {
Nodes []qlUser `json:"nodes"`
PageInfo qlPageInfo `json:"pageInfo"`
}
qlOrganization struct {
MembersWithRole *qlMembersWithRoleConnection `json:"membersWithRole"`
Team *qlTeam `json:"team"`
Teams *qlTeamConnection `json:"teams"`
}
qlPageInfo struct {
EndCursor string `json:"endCursor"`
HasNextPage bool `json:"hasNextPage"`
}
qlResult struct {
Data *qlData `json:"data"`
}
qlTeam struct {
ID string `json:"id"`
Name string `json:"name"`
Slug string `json:"slug"`
Members *qlTeamMemberConnection `json:"members"`
}
qlTeamConnection struct {
Edges []qlTeamEdge `json:"edges"`
PageInfo qlPageInfo `json:"pageInfo"`
}
qlTeamEdge struct {
Node qlTeam `json:"node"`
}
qlTeamMemberConnection struct {
Edges []qlTeamMemberEdge `json:"edges"`
PageInfo qlPageInfo `json:"pageInfo"`
}
qlTeamMemberEdge struct {
Node qlUser `json:"node"`
}
qlUser struct {
ID string `json:"id"`
Login string `json:"login"`
Name string `json:"name"`
Email string `json:"email"`
}
)
func (p *Provider) listOrganizationMembers(ctx context.Context, orgSlug string) ([]qlUser, error) {
var results []qlUser
var cursor *string
for {
var res qlResult
q := fmt.Sprintf(`query {
organization(login:%s) {
membersWithRole(first:%d, after:%s) {
pageInfo {
endCursor
hasNextPage
}
nodes {
id
login
name
email
}
}
}
}`, encode(orgSlug), maxPageCount, encode(cursor))
_, err := p.graphql(ctx, q, &res)
if err != nil {
return nil, err
}
results = append(results, res.Data.Organization.MembersWithRole.Nodes...)
if !res.Data.Organization.MembersWithRole.PageInfo.HasNextPage {
break
}
cursor = &res.Data.Organization.MembersWithRole.PageInfo.EndCursor
}
return results, nil
}
func (p *Provider) listOrganizationTeamsWithMemberIDs(ctx context.Context, orgSlug string) ([]teamWithMemberIDs, error) {
var results []teamWithMemberIDs
var pageInfos []qlPageInfo
// first query all the teams with their members
var cursor *string
for {
var res qlResult
q := fmt.Sprintf(`query {
organization(login:%s) {
teams(first:%d, after:%s) {
pageInfo {
endCursor
hasNextPage
}
edges {
node {
id
name
slug
members(first:%d) {
pageInfo {
endCursor
hasNextPage
}
edges {
node {
id
}
}
}
}
}
}
}
}`, encode(orgSlug), maxPageCount, encode(cursor), maxPageCount)
_, err := p.graphql(ctx, q, &res)
if err != nil {
return nil, err
}
for _, teamEdge := range res.Data.Organization.Teams.Edges {
var memberIDs []string
for _, memberEdge := range teamEdge.Node.Members.Edges {
memberIDs = append(memberIDs, memberEdge.Node.ID)
}
results = append(results, teamWithMemberIDs{
ID: teamEdge.Node.ID,
Slug: teamEdge.Node.Slug,
Name: teamEdge.Node.Name,
MemberIDs: memberIDs,
})
pageInfos = append(pageInfos, teamEdge.Node.Members.PageInfo)
}
if !res.Data.Organization.Teams.PageInfo.HasNextPage {
break
}
cursor = &res.Data.Organization.Teams.PageInfo.EndCursor
}
// it's possible we didn't get all the members if the initial query, so go through each team and
// check the member pageInfo. If there are still remaining members, query those.
for i, pageInfo := range pageInfos {
if !pageInfo.HasNextPage {
continue
}
cursor = &pageInfo.EndCursor
for {
var res qlResult
q := fmt.Sprintf(`query {
organization(login:%s) {
team(slug:%s) {
members(first:%d, after:%s) {
pageInfo {
endCursor
hasNextPage
}
edges {
node {
id
}
}
}
}
}
}`, encode(orgSlug), encode(results[i].Slug), maxPageCount, encode(cursor))
_, err := p.graphql(ctx, q, &res)
if err != nil {
return nil, err
}
for _, memberEdge := range res.Data.Organization.Team.Members.Edges {
results[i].MemberIDs = append(results[i].MemberIDs, memberEdge.Node.ID)
}
if !res.Data.Organization.Team.Members.PageInfo.HasNextPage {
break
}
cursor = &res.Data.Organization.Team.Members.PageInfo.EndCursor
}
}
return results, nil
}
func (p *Provider) listUserOrganizationTeams(ctx context.Context, userSlug string, orgSlug string) ([]string, error) {
// GitHub's Rest API doesn't have an easy way of querying this data, so we use the GraphQL API.
var teamSlugs []string
var cursor *string
for {
var res qlResult
q := fmt.Sprintf(`query {
organization(login:%s) {
teams(first:%d, userLogins:[%s], after:%s) {
pageInfo {
endCursor
hasNextPage
}
edges {
node {
id
}
}
}
}
}`, encode(orgSlug), maxPageCount, encode(userSlug), encode(cursor))
_, err := p.graphql(ctx, q, &res)
if err != nil {
return nil, err
}
for _, edge := range res.Data.Organization.Teams.Edges {
teamSlugs = append(teamSlugs, edge.Node.Slug)
}
if !res.Data.Organization.Teams.PageInfo.HasNextPage {
break
}
cursor = &res.Data.Organization.Teams.PageInfo.EndCursor
}
return teamSlugs, nil
}
func encode(obj interface{}) string {
bs, _ := json.Marshal(obj)
return string(bs)
}