envoy: exit if envoy exits (#2240)

This commit is contained in:
Caleb Doxsey 2021-05-27 15:11:12 -06:00 committed by GitHub
parent 1cf0c701ba
commit ef62d9bb31
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 131 additions and 43 deletions

4
go.mod
View file

@ -7,6 +7,7 @@ require (
contrib.go.opencensus.io/exporter/prometheus v0.3.0
contrib.go.opencensus.io/exporter/zipkin v0.1.2
github.com/DataDog/opencensus-go-exporter-datadog v0.0.0-20200406135749-5c268882acf0
github.com/StackExchange/wmi v0.0.0-20210224194228-fe8f1750fd46 // indirect
github.com/btcsuite/btcutil v1.0.2
github.com/caddyserver/certmagic v0.13.1
github.com/cenkalti/backoff/v4 v4.1.0
@ -16,6 +17,7 @@ require (
github.com/envoyproxy/protoc-gen-validate v0.6.1
github.com/fsnotify/fsnotify v1.4.9
github.com/go-chi/chi v1.5.4
github.com/go-ole/go-ole v1.2.5 // indirect
github.com/go-redis/redis/v8 v8.8.3
github.com/golang/mock v1.5.0
github.com/golang/protobuf v1.5.2
@ -50,6 +52,7 @@ require (
github.com/rs/cors v1.7.0
github.com/rs/zerolog v1.22.0
github.com/scylladb/go-set v1.0.2
github.com/shirou/gopsutil v3.21.4+incompatible
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966
github.com/spf13/afero v1.5.1 // indirect
github.com/spf13/cast v1.3.1 // indirect
@ -57,6 +60,7 @@ require (
github.com/spf13/jwalterweatherman v1.1.0 // indirect
github.com/spf13/viper v1.7.1
github.com/stretchr/testify v1.7.0
github.com/tklauser/go-sysconf v0.3.6 // indirect
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80
github.com/yuin/gopher-lua v0.0.0-20200816102855-ee81675732da
go.opencensus.io v0.23.0

12
go.sum
View file

@ -65,6 +65,8 @@ github.com/PuerkitoBio/rehttp v1.0.0 h1:aJ7A7YI2lIvOxcJVeUZY4P6R7kKZtLeONjgyKGwO
github.com/PuerkitoBio/rehttp v1.0.0/go.mod h1:ItsOiHl4XeMOV3rzbZqQRjLc3QQxbE6391/9iNG7rE8=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
github.com/StackExchange/wmi v0.0.0-20210224194228-fe8f1750fd46 h1:5sXbqlSomvdjlRbWyNqkPsJ3Fg+tQZCbgeX1VGljbQY=
github.com/StackExchange/wmi v0.0.0-20210224194228-fe8f1750fd46/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g=
github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII=
github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c=
@ -203,6 +205,8 @@ github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgO
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
github.com/go-ole/go-ole v1.2.5 h1:t4MGB5xEDZvXI+0rMjjsfBsD7yAgp/s9ZDkL1JndXwY=
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-redis/redis/v8 v8.8.3 h1:BefJyU89cTF25I00D5N9pJdWB1d1RBj8d7MBf71M7uQ=
github.com/go-redis/redis/v8 v8.8.3/go.mod h1:ik7vb7+gm8Izylxu6kf6wG26/t2VljgCfSQ1DM4O1uU=
github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
@ -567,6 +571,8 @@ github.com/scylladb/go-set v1.0.2/go.mod h1:DkpGd78rljTxKAnTDPFqXSGxvETQnJyuSOQw
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shirou/gopsutil v3.21.4+incompatible h1:fuHcTm5mX+wzo542cmYcV9RTGQLbnHLI5SyQ5ryTVck=
github.com/shirou/gopsutil v3.21.4+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
@ -620,6 +626,10 @@ github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/tinylib/msgp v1.1.2 h1:gWmO7n0Ys2RBEb7GPYB9Ujq8Mk5p2U08lRnmMcGy6BQ=
github.com/tinylib/msgp v1.1.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/tklauser/go-sysconf v0.3.6 h1:oc1sJWvKkmvIxhDHeKWvZS4f6AW+YcoguSfRF2/Hmo4=
github.com/tklauser/go-sysconf v0.3.6/go.mod h1:MkWzOF4RMCshBAMXuhXJs64Rte09mITnppBXY/rYEFI=
github.com/tklauser/numcpus v0.2.2 h1:oyhllyrScuYI6g+h/zUvNXNp1wy7x8qQy3t/piefldA=
github.com/tklauser/numcpus v0.2.2/go.mod h1:x3qojaO3uyYt0i56EW/VUYs7uBvdl2fkfZFu0T9wgjM=
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y=
@ -828,6 +838,7 @@ golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -867,6 +878,7 @@ golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210316164454-77fc1eacc6aa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

View file

@ -15,7 +15,6 @@ import (
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
@ -28,13 +27,12 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/natefinch/atomic"
"github.com/rs/zerolog"
"go.opencensus.io/stats/view"
"github.com/shirou/gopsutil/process"
"google.golang.org/protobuf/encoding/protojson"
"github.com/pomerium/pomerium/config"
"github.com/pomerium/pomerium/config/envoyconfig"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/telemetry/metrics"
"github.com/pomerium/pomerium/internal/telemetry/trace"
)
@ -62,6 +60,8 @@ type Server struct {
envoyPath string
restartEpoch int
monitorProcessCancel context.CancelFunc
mu sync.Mutex
options serverOptions
}
@ -107,6 +107,8 @@ func NewServer(ctx context.Context, src config.Source, grpcPort, httpPort string
grpcPort: grpcPort,
httpPort: httpPort,
envoyPath: envoyPath,
monitorProcessCancel: func() {},
}
go srv.runProcessCollector(ctx)
@ -164,19 +166,22 @@ func (srv *Server) update(ctx context.Context, cfg *config.Config) {
}
srv.options = options
if err := srv.writeConfig(ctx, cfg); err != nil {
log.Error(ctx).Err(err).Str("service", "envoy").Msg("envoy: failed to write envoy config")
return
}
log.Info(ctx).Msg("envoy: starting envoy process")
if err := srv.run(ctx); err != nil {
if err := srv.run(ctx, cfg); err != nil {
log.Error(ctx).Err(err).Str("service", "envoy").Msg("envoy: failed to run envoy process")
return
}
}
func (srv *Server) run(ctx context.Context) error {
func (srv *Server) run(ctx context.Context, cfg *config.Config) error {
// cancel any process monitor since we will be killing the previous process
srv.monitorProcessCancel()
if err := srv.writeConfig(ctx, cfg); err != nil {
log.Error(ctx).Err(err).Str("service", "envoy").Msg("envoy: failed to write envoy config")
return err
}
args := []string{
"-c", configFileName,
"--log-level", srv.options.logLevel,
@ -186,10 +191,10 @@ func (srv *Server) run(ctx context.Context) error {
if baseID, ok := readBaseID(); ok {
args = append(args, "--base-id", strconv.Itoa(baseID), "--restart-epoch", strconv.Itoa(srv.restartEpoch))
srv.restartEpoch++ // start with epoch zero when we're a fresh pomerium process
} else {
args = append(args, "--use-dynamic-base-id", "--base-id-path", baseIDPath)
}
srv.restartEpoch++
cmd := exec.Command(srv.envoyPath, args...) // #nosec
cmd.Dir = srv.wd
@ -213,6 +218,13 @@ func (srv *Server) run(ctx context.Context) error {
if err != nil {
return fmt.Errorf("error starting envoy: %w", err)
}
// call Wait to avoid zombie processes
go func() { _ = cmd.Wait() }()
// monitor the process so we exit if it prematurely exits
var monitorProcessCtx context.Context
monitorProcessCtx, srv.monitorProcessCancel = context.WithCancel(context.Background())
go srv.monitorProcess(monitorProcessCtx, int32(cmd.Process.Pid))
// release the previous process so we can hot-reload
if srv.cmd != nil && srv.cmd.Process != nil {
@ -362,34 +374,38 @@ func (srv *Server) handleLogs(ctx context.Context, rc io.ReadCloser) {
}
}
func (srv *Server) runProcessCollector(ctx context.Context) {
// macos is not supported
if runtime.GOOS != "linux" {
return
func (srv *Server) monitorProcess(ctx context.Context, pid int32) {
log.Info(ctx).
Int32("pid", pid).
Msg("envoy: start monitoring subprocess")
proc, err := process.NewProcessWithContext(ctx, pid)
if err != nil {
log.Fatal().Err(err).
Int32("pid", pid).
Msg("envoy: error retrieving subprocess information")
}
pc := metrics.NewProcessCollector("envoy")
if err := view.Register(pc.Views()...); err != nil {
log.Error(ctx).Err(err).Msg("failed to register envoy process metric views")
}
const collectInterval = time.Second * 10
ticker := time.NewTicker(collectInterval)
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for range ticker.C {
var pid int
srv.mu.Lock()
if srv.cmd != nil && srv.cmd.Process != nil {
pid = srv.cmd.Process.Pid
for {
// wait for the next tick
select {
case <-ctx.Done():
return
case <-ticker.C:
}
srv.mu.Unlock()
if pid > 0 {
err := pc.Measure(context.Background(), pid)
if err != nil {
log.Error(ctx).Err(err).Msg("failed to measure envoy process metrics")
}
running, err := proc.IsRunningWithContext(ctx)
if err != nil {
log.Fatal().Err(err).
Int32("pid", pid).
Msg("envoy: error retrieving subprocess status")
} else if !running {
log.Fatal().Err(err).
Int32("pid", pid).
Msg("envoy: subprocess exited")
}
}
}

View file

@ -0,0 +1,14 @@
// +build darwin
package envoy
import (
"context"
"syscall"
)
var sysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
func (srv *Server) runProcessCollector(ctx context.Context) {}

View file

@ -2,9 +2,51 @@
package envoy
import "syscall"
import (
"context"
"syscall"
"time"
"go.opencensus.io/stats/view"
"github.com/pomerium/pomerium/internal/log"
"github.com/pomerium/pomerium/internal/telemetry/metrics"
)
var sysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
Pdeathsig: syscall.SIGTERM,
}
func (srv *Server) runProcessCollector(ctx context.Context) {
pc := metrics.NewProcessCollector("envoy")
if err := view.Register(pc.Views()...); err != nil {
log.Error(ctx).Err(err).Msg("failed to register envoy process metric views")
}
const collectInterval = time.Second * 10
ticker := time.NewTicker(collectInterval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
}
var pid int
srv.mu.Lock()
if srv.cmd != nil && srv.cmd.Process != nil {
pid = srv.cmd.Process.Pid
}
srv.mu.Unlock()
if pid > 0 {
err := pc.Measure(ctx, pid)
if err != nil {
log.Error(ctx).Err(err).Msg("failed to measure envoy process metrics")
}
}
}
}

View file

@ -1,9 +0,0 @@
// +build !linux
package envoy
import "syscall"
var sysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}

View file

@ -0,0 +1,9 @@
// +build !linux,!darwin
package envoy
import (
"context"
)
func (srv *Server) runProcessCollector(ctx context.Context) {}