diff --git a/go.mod b/go.mod index 4510a9879..7a1541fb8 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( contrib.go.opencensus.io/exporter/prometheus v0.3.0 contrib.go.opencensus.io/exporter/zipkin v0.1.2 github.com/DataDog/opencensus-go-exporter-datadog v0.0.0-20200406135749-5c268882acf0 + github.com/StackExchange/wmi v0.0.0-20210224194228-fe8f1750fd46 // indirect github.com/btcsuite/btcutil v1.0.2 github.com/caddyserver/certmagic v0.13.1 github.com/cenkalti/backoff/v4 v4.1.0 @@ -16,6 +17,7 @@ require ( github.com/envoyproxy/protoc-gen-validate v0.6.1 github.com/fsnotify/fsnotify v1.4.9 github.com/go-chi/chi v1.5.4 + github.com/go-ole/go-ole v1.2.5 // indirect github.com/go-redis/redis/v8 v8.8.3 github.com/golang/mock v1.5.0 github.com/golang/protobuf v1.5.2 @@ -50,6 +52,7 @@ require ( github.com/rs/cors v1.7.0 github.com/rs/zerolog v1.22.0 github.com/scylladb/go-set v1.0.2 + github.com/shirou/gopsutil v3.21.4+incompatible github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 github.com/spf13/afero v1.5.1 // indirect github.com/spf13/cast v1.3.1 // indirect @@ -57,6 +60,7 @@ require ( github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/spf13/viper v1.7.1 github.com/stretchr/testify v1.7.0 + github.com/tklauser/go-sysconf v0.3.6 // indirect github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 github.com/yuin/gopher-lua v0.0.0-20200816102855-ee81675732da go.opencensus.io v0.23.0 diff --git a/go.sum b/go.sum index 455ff69fb..0b5db233e 100644 --- a/go.sum +++ b/go.sum @@ -65,6 +65,8 @@ github.com/PuerkitoBio/rehttp v1.0.0 h1:aJ7A7YI2lIvOxcJVeUZY4P6R7kKZtLeONjgyKGwO github.com/PuerkitoBio/rehttp v1.0.0/go.mod h1:ItsOiHl4XeMOV3rzbZqQRjLc3QQxbE6391/9iNG7rE8= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= +github.com/StackExchange/wmi v0.0.0-20210224194228-fe8f1750fd46 h1:5sXbqlSomvdjlRbWyNqkPsJ3Fg+tQZCbgeX1VGljbQY= +github.com/StackExchange/wmi v0.0.0-20210224194228-fe8f1750fd46/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg= github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= @@ -203,6 +205,8 @@ github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgO github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-ole/go-ole v1.2.5 h1:t4MGB5xEDZvXI+0rMjjsfBsD7yAgp/s9ZDkL1JndXwY= +github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-redis/redis/v8 v8.8.3 h1:BefJyU89cTF25I00D5N9pJdWB1d1RBj8d7MBf71M7uQ= github.com/go-redis/redis/v8 v8.8.3/go.mod h1:ik7vb7+gm8Izylxu6kf6wG26/t2VljgCfSQ1DM4O1uU= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= @@ -567,6 +571,8 @@ github.com/scylladb/go-set v1.0.2/go.mod h1:DkpGd78rljTxKAnTDPFqXSGxvETQnJyuSOQw github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= +github.com/shirou/gopsutil v3.21.4+incompatible h1:fuHcTm5mX+wzo542cmYcV9RTGQLbnHLI5SyQ5ryTVck= +github.com/shirou/gopsutil v3.21.4+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q= @@ -620,6 +626,10 @@ github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/tinylib/msgp v1.1.2 h1:gWmO7n0Ys2RBEb7GPYB9Ujq8Mk5p2U08lRnmMcGy6BQ= github.com/tinylib/msgp v1.1.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= +github.com/tklauser/go-sysconf v0.3.6 h1:oc1sJWvKkmvIxhDHeKWvZS4f6AW+YcoguSfRF2/Hmo4= +github.com/tklauser/go-sysconf v0.3.6/go.mod h1:MkWzOF4RMCshBAMXuhXJs64Rte09mITnppBXY/rYEFI= +github.com/tklauser/numcpus v0.2.2 h1:oyhllyrScuYI6g+h/zUvNXNp1wy7x8qQy3t/piefldA= +github.com/tklauser/numcpus v0.2.2/go.mod h1:x3qojaO3uyYt0i56EW/VUYs7uBvdl2fkfZFu0T9wgjM= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y= @@ -828,6 +838,7 @@ golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -867,6 +878,7 @@ golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210316164454-77fc1eacc6aa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internal/envoy/envoy.go b/internal/envoy/envoy.go index 2471f3fe2..215aa9bd4 100644 --- a/internal/envoy/envoy.go +++ b/internal/envoy/envoy.go @@ -15,7 +15,6 @@ import ( "os/exec" "path/filepath" "regexp" - "runtime" "strconv" "strings" "sync" @@ -28,13 +27,12 @@ import ( "github.com/google/go-cmp/cmp" "github.com/natefinch/atomic" "github.com/rs/zerolog" - "go.opencensus.io/stats/view" + "github.com/shirou/gopsutil/process" "google.golang.org/protobuf/encoding/protojson" "github.com/pomerium/pomerium/config" "github.com/pomerium/pomerium/config/envoyconfig" "github.com/pomerium/pomerium/internal/log" - "github.com/pomerium/pomerium/internal/telemetry/metrics" "github.com/pomerium/pomerium/internal/telemetry/trace" ) @@ -62,6 +60,8 @@ type Server struct { envoyPath string restartEpoch int + monitorProcessCancel context.CancelFunc + mu sync.Mutex options serverOptions } @@ -107,6 +107,8 @@ func NewServer(ctx context.Context, src config.Source, grpcPort, httpPort string grpcPort: grpcPort, httpPort: httpPort, envoyPath: envoyPath, + + monitorProcessCancel: func() {}, } go srv.runProcessCollector(ctx) @@ -164,19 +166,22 @@ func (srv *Server) update(ctx context.Context, cfg *config.Config) { } srv.options = options - if err := srv.writeConfig(ctx, cfg); err != nil { - log.Error(ctx).Err(err).Str("service", "envoy").Msg("envoy: failed to write envoy config") - return - } - log.Info(ctx).Msg("envoy: starting envoy process") - if err := srv.run(ctx); err != nil { + if err := srv.run(ctx, cfg); err != nil { log.Error(ctx).Err(err).Str("service", "envoy").Msg("envoy: failed to run envoy process") return } } -func (srv *Server) run(ctx context.Context) error { +func (srv *Server) run(ctx context.Context, cfg *config.Config) error { + // cancel any process monitor since we will be killing the previous process + srv.monitorProcessCancel() + + if err := srv.writeConfig(ctx, cfg); err != nil { + log.Error(ctx).Err(err).Str("service", "envoy").Msg("envoy: failed to write envoy config") + return err + } + args := []string{ "-c", configFileName, "--log-level", srv.options.logLevel, @@ -186,10 +191,10 @@ func (srv *Server) run(ctx context.Context) error { if baseID, ok := readBaseID(); ok { args = append(args, "--base-id", strconv.Itoa(baseID), "--restart-epoch", strconv.Itoa(srv.restartEpoch)) - srv.restartEpoch++ // start with epoch zero when we're a fresh pomerium process } else { args = append(args, "--use-dynamic-base-id", "--base-id-path", baseIDPath) } + srv.restartEpoch++ cmd := exec.Command(srv.envoyPath, args...) // #nosec cmd.Dir = srv.wd @@ -213,6 +218,13 @@ func (srv *Server) run(ctx context.Context) error { if err != nil { return fmt.Errorf("error starting envoy: %w", err) } + // call Wait to avoid zombie processes + go func() { _ = cmd.Wait() }() + + // monitor the process so we exit if it prematurely exits + var monitorProcessCtx context.Context + monitorProcessCtx, srv.monitorProcessCancel = context.WithCancel(context.Background()) + go srv.monitorProcess(monitorProcessCtx, int32(cmd.Process.Pid)) // release the previous process so we can hot-reload if srv.cmd != nil && srv.cmd.Process != nil { @@ -362,34 +374,38 @@ func (srv *Server) handleLogs(ctx context.Context, rc io.ReadCloser) { } } -func (srv *Server) runProcessCollector(ctx context.Context) { - // macos is not supported - if runtime.GOOS != "linux" { - return +func (srv *Server) monitorProcess(ctx context.Context, pid int32) { + log.Info(ctx). + Int32("pid", pid). + Msg("envoy: start monitoring subprocess") + + proc, err := process.NewProcessWithContext(ctx, pid) + if err != nil { + log.Fatal().Err(err). + Int32("pid", pid). + Msg("envoy: error retrieving subprocess information") } - pc := metrics.NewProcessCollector("envoy") - if err := view.Register(pc.Views()...); err != nil { - log.Error(ctx).Err(err).Msg("failed to register envoy process metric views") - } - - const collectInterval = time.Second * 10 - ticker := time.NewTicker(collectInterval) + ticker := time.NewTicker(time.Second) defer ticker.Stop() - for range ticker.C { - var pid int - srv.mu.Lock() - if srv.cmd != nil && srv.cmd.Process != nil { - pid = srv.cmd.Process.Pid + for { + // wait for the next tick + select { + case <-ctx.Done(): + return + case <-ticker.C: } - srv.mu.Unlock() - if pid > 0 { - err := pc.Measure(context.Background(), pid) - if err != nil { - log.Error(ctx).Err(err).Msg("failed to measure envoy process metrics") - } + running, err := proc.IsRunningWithContext(ctx) + if err != nil { + log.Fatal().Err(err). + Int32("pid", pid). + Msg("envoy: error retrieving subprocess status") + } else if !running { + log.Fatal().Err(err). + Int32("pid", pid). + Msg("envoy: subprocess exited") } } } diff --git a/internal/envoy/envoy_darwin.go b/internal/envoy/envoy_darwin.go new file mode 100644 index 000000000..1d6010dea --- /dev/null +++ b/internal/envoy/envoy_darwin.go @@ -0,0 +1,14 @@ +// +build darwin + +package envoy + +import ( + "context" + "syscall" +) + +var sysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, +} + +func (srv *Server) runProcessCollector(ctx context.Context) {} diff --git a/internal/envoy/envoy_linux.go b/internal/envoy/envoy_linux.go index 5a03e3f72..e95bade06 100644 --- a/internal/envoy/envoy_linux.go +++ b/internal/envoy/envoy_linux.go @@ -2,9 +2,51 @@ package envoy -import "syscall" +import ( + "context" + "syscall" + "time" + + "go.opencensus.io/stats/view" + + "github.com/pomerium/pomerium/internal/log" + "github.com/pomerium/pomerium/internal/telemetry/metrics" +) var sysProcAttr = &syscall.SysProcAttr{ Setpgid: true, Pdeathsig: syscall.SIGTERM, } + +func (srv *Server) runProcessCollector(ctx context.Context) { + pc := metrics.NewProcessCollector("envoy") + if err := view.Register(pc.Views()...); err != nil { + log.Error(ctx).Err(err).Msg("failed to register envoy process metric views") + } + + const collectInterval = time.Second * 10 + ticker := time.NewTicker(collectInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + } + + var pid int + srv.mu.Lock() + if srv.cmd != nil && srv.cmd.Process != nil { + pid = srv.cmd.Process.Pid + } + srv.mu.Unlock() + + if pid > 0 { + err := pc.Measure(ctx, pid) + if err != nil { + log.Error(ctx).Err(err).Msg("failed to measure envoy process metrics") + } + } + } +} diff --git a/internal/envoy/envoy_notlinux.go b/internal/envoy/envoy_notlinux.go deleted file mode 100644 index e3ab7ba34..000000000 --- a/internal/envoy/envoy_notlinux.go +++ /dev/null @@ -1,9 +0,0 @@ -// +build !linux - -package envoy - -import "syscall" - -var sysProcAttr = &syscall.SysProcAttr{ - Setpgid: true, -} diff --git a/internal/envoy/envoy_other.go b/internal/envoy/envoy_other.go new file mode 100644 index 000000000..ac42290c3 --- /dev/null +++ b/internal/envoy/envoy_other.go @@ -0,0 +1,9 @@ +// +build !linux,!darwin + +package envoy + +import ( + "context" +) + +func (srv *Server) runProcessCollector(ctx context.Context) {}