databroker: refactor databroker to sync all changes (#1879)

* refactor backend, implement encrypted store

* refactor in-memory store

* wip

* wip

* wip

* add syncer test

* fix redis expiry

* fix linting issues

* fix test by skipping non-config records

* fix backoff import

* fix init issues

* fix query

* wait for initial sync before starting directory sync

* add type to SyncLatest

* add more log messages, fix deadlock in in-memory store, always return server version from SyncLatest

* update sync types and tests

* add redis tests

* skip macos in github actions

* add comments to proto

* split getBackend into separate methods

* handle errors in initVersion

* return different error for not found vs other errors in get

* use exponential backoff for redis transaction retry

* rename raw to result

* use context instead of close channel

* store type urls as constants in databroker

* use timestampb instead of ptypes

* fix group merging not waiting

* change locked names

* update GetAll to return latest record version

* add method to grpcutil to get the type url for a protobuf type
This commit is contained in:
Caleb Doxsey 2021-02-18 15:24:33 -07:00 committed by GitHub
parent b1871b0f2e
commit 5d60cff21e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
66 changed files with 2762 additions and 2871 deletions

View file

@ -5,29 +5,30 @@ import (
"context"
"errors"
"fmt"
"strconv"
"sync"
"time"
"github.com/cenkalti/backoff/v4"
redis "github.com/go-redis/redis/v8"
"github.com/golang/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"
"google.golang.org/protobuf/types/known/timestamppb"
"github.com/pomerium/pomerium/config"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/signal"
"github.com/pomerium/pomerium/internal/telemetry/metrics"
"github.com/pomerium/pomerium/internal/telemetry/trace"
"github.com/pomerium/pomerium/pkg/grpc/databroker"
"github.com/pomerium/pomerium/pkg/storage"
)
// Name of the storage backend.
const Name = config.StorageRedisName
const (
maxTransactionRetries = 100
watchPollInterval = 30 * time.Second
lastVersionKey = "pomerium.last_version"
lastVersionChKey = "pomerium.last_version_ch"
recordHashKey = "pomerium.records"
changesSetKey = "pomerium.changes"
)
// custom errors
@ -35,21 +36,24 @@ var (
ErrExceededMaxRetries = errors.New("redis: transaction reached maximum number of retries")
)
// DB implements the storage.Backend on top of redis.
type DB struct {
cfg *dbConfig
// Backend implements the storage.Backend on top of redis.
type Backend struct {
cfg *config
client *redis.Client
client *redis.Client
onChange *signal.Signal
closeOnce sync.Once
closed chan struct{}
}
// New creates a new redis storage backend.
func New(rawURL string, options ...Option) (*DB, error) {
db := &DB{
cfg: getConfig(options...),
closed: make(chan struct{}),
func New(rawURL string, options ...Option) (*Backend, error) {
cfg := getConfig(options...)
backend := &Backend{
cfg: cfg,
closed: make(chan struct{}),
onChange: signal.New(),
}
opts, err := redis.ParseURL(rawURL)
if err != nil {
@ -57,194 +61,150 @@ func New(rawURL string, options ...Option) (*DB, error) {
}
// when using TLS, the TLS config will not be set to nil, in which case we replace it with our own
if opts.TLSConfig != nil {
opts.TLSConfig = db.cfg.tls
opts.TLSConfig = backend.cfg.tls
}
db.client = redis.NewClient(opts)
metrics.AddRedisMetrics(db.client.PoolStats)
return db, nil
}
backend.client = redis.NewClient(opts)
metrics.AddRedisMetrics(backend.client.PoolStats)
go backend.listenForVersionChanges()
if cfg.expiry != 0 {
go func() {
ticker := time.NewTicker(time.Minute)
defer ticker.Stop()
for {
select {
case <-backend.closed:
return
case <-ticker.C:
}
// ClearDeleted clears all the deleted records older than the cutoff time.
func (db *DB) ClearDeleted(ctx context.Context, cutoff time.Time) {
var err error
_, span := trace.StartSpan(ctx, "databroker.redis.ClearDeleted")
defer span.End()
defer func(start time.Time) { recordOperation(ctx, start, "clear_deleted", err) }(time.Now())
ids, _ := db.client.SMembers(ctx, formatDeletedSetKey(db.cfg.recordType)).Result()
records, _ := redisGetRecords(ctx, db.client, db.cfg.recordType, ids)
_, err = db.client.Pipelined(ctx, func(p redis.Pipeliner) error {
for _, record := range records {
if record.GetDeletedAt().AsTime().Before(cutoff) {
p.HDel(ctx, formatRecordsKey(db.cfg.recordType), record.GetId())
p.ZRem(ctx, formatVersionSetKey(db.cfg.recordType), record.GetId())
p.SRem(ctx, formatDeletedSetKey(db.cfg.recordType), record.GetId())
backend.removeChangesBefore(time.Now().Add(-cfg.expiry))
}
}
return nil
})
}()
}
return backend, nil
}
// Close closes the underlying redis connection and any watchers.
func (db *DB) Close() error {
func (backend *Backend) Close() error {
var err error
db.closeOnce.Do(func() {
err = db.client.Close()
close(db.closed)
backend.closeOnce.Do(func() {
err = backend.client.Close()
close(backend.closed)
})
return err
}
// Delete marks a record as deleted.
func (db *DB) Delete(ctx context.Context, id string) (err error) {
_, span := trace.StartSpan(ctx, "databroker.redis.Delete")
defer span.End()
defer func(start time.Time) { recordOperation(ctx, start, "delete", err) }(time.Now())
var record *databroker.Record
err = db.incrementVersion(ctx,
func(tx *redis.Tx, version int64) error {
var err error
record, err = redisGetRecord(ctx, tx, db.cfg.recordType, id)
if errors.Is(err, redis.Nil) {
// nothing to do, as the record doesn't exist
return nil
} else if err != nil {
return err
}
// mark it as deleted
record.DeletedAt = timestamppb.Now()
record.Version = formatVersion(version)
return nil
},
func(p redis.Pipeliner, version int64) error {
err := redisSetRecord(ctx, p, db.cfg.recordType, record)
if err != nil {
return err
}
// add it to the collection of deleted entries
p.SAdd(ctx, formatDeletedSetKey(db.cfg.recordType), record.GetId())
return nil
})
return err
}
// Get gets a record.
func (db *DB) Get(ctx context.Context, id string) (record *databroker.Record, err error) {
// Get gets a record from redis.
func (backend *Backend) Get(ctx context.Context, recordType, id string) (_ *databroker.Record, err error) {
_, span := trace.StartSpan(ctx, "databroker.redis.Get")
defer span.End()
defer func(start time.Time) { recordOperation(ctx, start, "get", err) }(time.Now())
record, err = redisGetRecord(ctx, db.client, db.cfg.recordType, id)
return record, err
}
key, field := getHashKey(recordType, id)
cmd := backend.client.HGet(ctx, key, field)
raw, err := cmd.Result()
if err == redis.Nil {
return nil, storage.ErrNotFound
} else if err != nil {
return nil, err
}
// List lists all the records changed since the sinceVersion. Records are sorted in version order.
func (db *DB) List(ctx context.Context, sinceVersion string) (records []*databroker.Record, err error) {
_, span := trace.StartSpan(ctx, "databroker.redis.List")
defer span.End()
defer func(start time.Time) { recordOperation(ctx, start, "list", err) }(time.Now())
var ids []string
ids, err = redisListIDsSince(ctx, db.client, db.cfg.recordType, sinceVersion)
var record databroker.Record
err = proto.Unmarshal([]byte(raw), &record)
if err != nil {
return nil, err
}
records, err = redisGetRecords(ctx, db.client, db.cfg.recordType, ids)
return records, err
return &record, nil
}
// Put updates a record.
func (db *DB) Put(ctx context.Context, id string, data *anypb.Any) (err error) {
// GetAll gets all the records from redis.
func (backend *Backend) GetAll(ctx context.Context) (records []*databroker.Record, latestRecordVersion uint64, err error) {
_, span := trace.StartSpan(ctx, "databroker.redis.GetAll")
defer span.End()
defer func(start time.Time) { recordOperation(ctx, start, "getall", err) }(time.Now())
p := backend.client.Pipeline()
lastVersionCmd := p.Get(ctx, lastVersionKey)
resultsCmd := p.HVals(ctx, recordHashKey)
_, err = p.Exec(ctx)
if err != nil {
return nil, 0, err
}
latestRecordVersion, err = lastVersionCmd.Uint64()
if errors.Is(err, redis.Nil) {
latestRecordVersion = 0
} else if err != nil {
return nil, 0, err
}
results, err := resultsCmd.Result()
if err != nil {
return nil, 0, err
}
for _, result := range results {
var record databroker.Record
err := proto.Unmarshal([]byte(result), &record)
if err != nil {
log.Warn().Err(err).Msg("redis: invalid record detected")
continue
}
records = append(records, &record)
}
return records, latestRecordVersion, nil
}
// Put puts a record into redis.
func (backend *Backend) Put(ctx context.Context, record *databroker.Record) (err error) {
_, span := trace.StartSpan(ctx, "databroker.redis.Put")
defer span.End()
defer func(start time.Time) { recordOperation(ctx, start, "put", err) }(time.Now())
var record *databroker.Record
err = db.incrementVersion(ctx,
func(tx *redis.Tx, version int64) error {
var err error
record, err = redisGetRecord(ctx, db.client, db.cfg.recordType, id)
if errors.Is(err, redis.Nil) {
record = new(databroker.Record)
record.CreatedAt = timestamppb.Now()
} else if err != nil {
return backend.incrementVersion(ctx,
func(tx *redis.Tx, version uint64) error {
record.ModifiedAt = timestamppb.Now()
record.Version = version
return nil
},
func(p redis.Pipeliner, version uint64) error {
bs, err := proto.Marshal(record)
if err != nil {
return err
}
record.ModifiedAt = timestamppb.Now()
record.Type = db.cfg.recordType
record.Id = id
record.Data = data
record.Version = formatVersion(version)
key, field := getHashKey(record.GetType(), record.GetId())
if record.DeletedAt != nil {
p.HDel(ctx, key, field)
} else {
p.HSet(ctx, key, field, bs)
}
p.ZAdd(ctx, changesSetKey, &redis.Z{
Score: float64(version),
Member: bs,
})
return nil
},
func(p redis.Pipeliner, version int64) error {
return redisSetRecord(ctx, p, db.cfg.recordType, record)
})
return err
}
// Watch returns a channel that is signaled any time the last version is incremented (ie on Put/Delete).
func (db *DB) Watch(ctx context.Context) <-chan struct{} {
s := signal.New()
ch := s.Bind()
go func() {
defer s.Unbind(ch)
defer close(ch)
// force a check
poll := time.NewTicker(watchPollInterval)
defer poll.Stop()
// use pub/sub for quicker notify
pubsub := db.client.Subscribe(ctx, formatLastVersionChannelKey(db.cfg.recordType))
defer func() { _ = pubsub.Close() }()
pubsubCh := pubsub.Channel()
var lastVersion int64
for {
v, err := redisGetLastVersion(ctx, db.client, db.cfg.recordType)
if err != nil {
log.Error().Err(err).Msg("redis: error retrieving last version")
} else if v != lastVersion {
// don't broadcast the first time
if lastVersion != 0 {
s.Broadcast()
}
lastVersion = v
}
select {
case <-ctx.Done():
return
case <-db.closed:
return
case <-poll.C:
case <-pubsubCh:
// re-check
}
}
}()
return ch
// Sync returns a record stream of any records changed after the specified version.
func (backend *Backend) Sync(ctx context.Context, version uint64) (storage.RecordStream, error) {
return newRecordStream(ctx, backend, version), nil
}
// incrementVersion increments the last version key, runs the code in `query`, then attempts to commit the code in
// `commit`. If the last version changes in the interim, we will retry the transaction.
func (db *DB) incrementVersion(ctx context.Context,
query func(tx *redis.Tx, version int64) error,
commit func(p redis.Pipeliner, version int64) error,
func (backend *Backend) incrementVersion(ctx context.Context,
query func(tx *redis.Tx, version uint64) error,
commit func(p redis.Pipeliner, version uint64) error,
) error {
// code is modeled on https://pkg.go.dev/github.com/go-redis/redis/v8#example-Client.Watch
txf := func(tx *redis.Tx) error {
version, err := redisGetLastVersion(ctx, tx, db.cfg.recordType)
if err != nil {
version, err := tx.Get(ctx, lastVersionKey).Uint64()
if errors.Is(err, redis.Nil) {
version = 0
} else if err != nil {
return err
}
version++
@ -260,16 +220,23 @@ func (db *DB) incrementVersion(ctx context.Context,
if err != nil {
return err
}
p.Set(ctx, formatLastVersionKey(db.cfg.recordType), version, 0)
p.Publish(ctx, formatLastVersionChannelKey(db.cfg.recordType), version)
p.Set(ctx, lastVersionKey, version, 0)
p.Publish(ctx, lastVersionChKey, version)
return nil
})
return err
}
bo := backoff.NewExponentialBackOff()
bo.MaxElapsedTime = 0
for i := 0; i < maxTransactionRetries; i++ {
err := db.client.Watch(ctx, txf, formatLastVersionKey(db.cfg.recordType))
err := backend.client.Watch(ctx, txf, lastVersionKey)
if errors.Is(err, redis.TxFailedErr) {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(bo.NextBackOff()):
}
continue // retry
} else if err != nil {
return err
@ -281,121 +248,81 @@ func (db *DB) incrementVersion(ctx context.Context,
return ErrExceededMaxRetries
}
func redisGetLastVersion(ctx context.Context, c redis.Cmdable, recordType string) (int64, error) {
version, err := c.Get(ctx, formatLastVersionKey(recordType)).Int64()
if errors.Is(err, redis.Nil) {
version = 0
} else if err != nil {
return 0, err
func (backend *Backend) listenForVersionChanges() {
ctx, cancel := context.WithCancel(context.Background())
go func() {
<-backend.closed
cancel()
}()
bo := backoff.NewExponentialBackOff()
bo.MaxElapsedTime = 0
outer:
for {
pubsub := backend.client.Subscribe(ctx, lastVersionChKey)
for {
msg, err := pubsub.Receive(ctx)
if err != nil {
_ = pubsub.Close()
select {
case <-ctx.Done():
return
case <-time.After(bo.NextBackOff()):
}
continue outer
}
bo.Reset()
switch msg.(type) {
case *redis.Message:
backend.onChange.Broadcast()
}
}
}
return version, nil
}
func redisGetRecord(ctx context.Context, c redis.Cmdable, recordType string, id string) (*databroker.Record, error) {
records, err := redisGetRecords(ctx, c, recordType, []string{id})
if err != nil {
return nil, err
} else if len(records) < 1 {
return nil, redis.Nil
}
return records[0], nil
}
func redisGetRecords(ctx context.Context, c redis.Cmdable, recordType string, ids []string) ([]*databroker.Record, error) {
if len(ids) == 0 {
return nil, nil
}
results, err := c.HMGet(ctx, formatRecordsKey(recordType), ids...).Result()
if err != nil {
return nil, err
}
records := make([]*databroker.Record, 0, len(results))
for _, result := range results {
// results are returned as either nil or a string
if result == nil {
continue
}
rawstr, ok := result.(string)
if !ok {
continue
}
var record databroker.Record
err := proto.Unmarshal([]byte(rawstr), &record)
func (backend *Backend) removeChangesBefore(cutoff time.Time) {
ctx := context.Background()
for {
cmd := backend.client.ZRangeByScore(ctx, changesSetKey, &redis.ZRangeBy{
Min: "-inf",
Max: "+inf",
Offset: 0,
Count: 1,
})
results, err := cmd.Result()
if err != nil {
continue
log.Error().Err(err).Msg("redis: error retrieving changes for expiration")
return
}
// nothing left to do
if len(results) == 0 {
return
}
var record databroker.Record
err = proto.Unmarshal([]byte(results[0]), &record)
if err != nil {
log.Warn().Err(err).Msg("redis: invalid record detected")
record.ModifiedAt = timestamppb.New(cutoff.Add(-time.Second)) // set the modified so will delete it
}
// if the record's modified timestamp is after the cutoff, we're all done, so break
if record.GetModifiedAt().AsTime().After(cutoff) {
break
}
// remove the record
err = backend.client.ZRem(ctx, changesSetKey, results[0]).Err()
if err != nil {
log.Error().Err(err).Msg("redis: error removing member")
return
}
records = append(records, &record)
}
return records, nil
}
func redisListIDsSince(ctx context.Context,
c redis.Cmdable, recordType string,
sinceVersion string,
) ([]string, error) {
v, err := strconv.ParseInt(sinceVersion, 16, 64)
if err != nil {
v = 0
}
rng := &redis.ZRangeBy{
Min: fmt.Sprintf("(%d", v),
Max: "+inf",
}
return c.ZRangeByScore(ctx, formatVersionSetKey(recordType), rng).Result()
}
func redisSetRecord(ctx context.Context, p redis.Pipeliner, recordType string, record *databroker.Record) error {
v, err := strconv.ParseInt(record.GetVersion(), 16, 64)
if err != nil {
v = 0
}
raw, err := proto.Marshal(record)
if err != nil {
return err
}
// store the record in the hash
p.HSet(ctx, formatRecordsKey(recordType), record.GetId(), string(raw))
// set its score for sorting by version
p.ZAdd(ctx, formatVersionSetKey(recordType), &redis.Z{
Score: float64(v),
Member: record.GetId(),
})
return nil
}
func formatDeletedSetKey(recordType string) string {
return fmt.Sprintf("%s_deleted_set", recordType)
}
func formatLastVersionChannelKey(recordType string) string {
return fmt.Sprintf("%s_last_version_ch", recordType)
}
func formatLastVersionKey(recordType string) string {
return fmt.Sprintf("%s_last_version", recordType)
}
func formatRecordsKey(recordType string) string {
return recordType
}
func formatVersion(version int64) string {
return fmt.Sprintf("%012d", version)
}
func formatVersionSetKey(recordType string) string {
return fmt.Sprintf("%s_version_set", recordType)
}
func recordOperation(ctx context.Context, startTime time.Time, operation string, err error) {
metrics.RecordStorageOperation(ctx, &metrics.StorageOperationTags{
Operation: operation,
Error: err,
Backend: Name,
}, time.Since(startTime))
func getHashKey(recordType, id string) (key, field string) {
return recordHashKey, fmt.Sprintf("%s/%s", recordType, id)
}