From 9ef1e20ea2e9628edc52abdae1983992975aae54 Mon Sep 17 00:00:00 2001 From: Spike Curtis Date: Mon, 30 Jun 2025 19:10:16 +0000 Subject: [PATCH] fix: stop tearing down non-TTY processes on SSH session end (#18673) (possibly temporary) fix for #18519 Matches OpenSSH for non-tty sessions, where we don't actively terminate the process. Adds explicit tracking to the SSH server for these processes so that if we are shutting down we terminate them: this ensures that we can shut down quickly to allow shutdown scripts to run. It also ensures our tests don't leak system resources. --- agent/agentssh/agentssh.go | 42 +++++++++++++++++++++++++++++++++- agent/agentssh/exec_other.go | 10 ++++---- agent/agentssh/exec_windows.go | 20 ++++++++-------- 3 files changed, 54 insertions(+), 18 deletions(-) diff --git a/agent/agentssh/agentssh.go b/agent/agentssh/agentssh.go index 293dd4db169ac..b802ede9c93e7 100644 --- a/agent/agentssh/agentssh.go +++ b/agent/agentssh/agentssh.go @@ -124,6 +124,7 @@ type Server struct { listeners map[net.Listener]struct{} conns map[net.Conn]struct{} sessions map[ssh.Session]struct{} + processes map[*os.Process]struct{} closing chan struct{} // Wait for goroutines to exit, waited without // a lock on mu but protected by closing. @@ -182,6 +183,7 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom fs: fs, conns: make(map[net.Conn]struct{}), sessions: make(map[ssh.Session]struct{}), + processes: make(map[*os.Process]struct{}), logger: logger, config: config, @@ -586,7 +588,10 @@ func (s *Server) startNonPTYSession(logger slog.Logger, session ssh.Session, mag // otherwise context cancellation will not propagate properly // and SSH server close may be delayed. cmd.SysProcAttr = cmdSysProcAttr() - cmd.Cancel = cmdCancel(session.Context(), logger, cmd) + + // to match OpenSSH, we don't actually tear a non-TTY command down, even if the session ends. + // c.f. https://github.com/coder/coder/issues/18519#issuecomment-3019118271 + cmd.Cancel = nil cmd.Stdout = session cmd.Stderr = session.Stderr() @@ -609,6 +614,16 @@ func (s *Server) startNonPTYSession(logger slog.Logger, session ssh.Session, mag s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "no", "start_command").Add(1) return xerrors.Errorf("start: %w", err) } + + // Since we don't cancel the process when the session stops, we still need to tear it down if we are closing. So + // track it here. + if !s.trackProcess(cmd.Process, true) { + // must be closing + err = cmdCancel(logger, cmd.Process) + return xerrors.Errorf("failed to track process: %w", err) + } + defer s.trackProcess(cmd.Process, false) + sigs := make(chan ssh.Signal, 1) session.Signals(sigs) defer func() { @@ -1052,6 +1067,27 @@ func (s *Server) trackSession(ss ssh.Session, add bool) (ok bool) { return true } +// trackCommand registers the process with the server. If the server is +// closing, the process is not registered and should be closed. +// +//nolint:revive +func (s *Server) trackProcess(p *os.Process, add bool) (ok bool) { + s.mu.Lock() + defer s.mu.Unlock() + if add { + if s.closing != nil { + // Server closed. + return false + } + s.wg.Add(1) + s.processes[p] = struct{}{} + return true + } + s.wg.Done() + delete(s.processes, p) + return true +} + // Close the server and all active connections. Server can be re-used // after Close is done. func (s *Server) Close() error { @@ -1091,6 +1127,10 @@ func (s *Server) Close() error { _ = c.Close() } + for p := range s.processes { + _ = cmdCancel(s.logger, p) + } + s.logger.Debug(ctx, "closing SSH server") err := s.srv.Close() diff --git a/agent/agentssh/exec_other.go b/agent/agentssh/exec_other.go index 54dfd50899412..aef496a1ef775 100644 --- a/agent/agentssh/exec_other.go +++ b/agent/agentssh/exec_other.go @@ -4,7 +4,7 @@ package agentssh import ( "context" - "os/exec" + "os" "syscall" "cdr.dev/slog" @@ -16,9 +16,7 @@ func cmdSysProcAttr() *syscall.SysProcAttr { } } -func cmdCancel(ctx context.Context, logger slog.Logger, cmd *exec.Cmd) func() error { - return func() error { - logger.Debug(ctx, "cmdCancel: sending SIGHUP to process and children", slog.F("pid", cmd.Process.Pid)) - return syscall.Kill(-cmd.Process.Pid, syscall.SIGHUP) - } +func cmdCancel(logger slog.Logger, p *os.Process) error { + logger.Debug(context.Background(), "cmdCancel: sending SIGHUP to process and children", slog.F("pid", p.Pid)) + return syscall.Kill(-p.Pid, syscall.SIGHUP) } diff --git a/agent/agentssh/exec_windows.go b/agent/agentssh/exec_windows.go index 39f0f97198479..0dafa67958a67 100644 --- a/agent/agentssh/exec_windows.go +++ b/agent/agentssh/exec_windows.go @@ -2,7 +2,7 @@ package agentssh import ( "context" - "os/exec" + "os" "syscall" "cdr.dev/slog" @@ -12,14 +12,12 @@ func cmdSysProcAttr() *syscall.SysProcAttr { return &syscall.SysProcAttr{} } -func cmdCancel(ctx context.Context, logger slog.Logger, cmd *exec.Cmd) func() error { - return func() error { - logger.Debug(ctx, "cmdCancel: killing process", slog.F("pid", cmd.Process.Pid)) - // Windows doesn't support sending signals to process groups, so we - // have to kill the process directly. In the future, we may want to - // implement a more sophisticated solution for process groups on - // Windows, but for now, this is a simple way to ensure that the - // process is terminated when the context is cancelled. - return cmd.Process.Kill() - } +func cmdCancel(logger slog.Logger, p *os.Process) error { + logger.Debug(context.Background(), "cmdCancel: killing process", slog.F("pid", p.Pid)) + // Windows doesn't support sending signals to process groups, so we + // have to kill the process directly. In the future, we may want to + // implement a more sophisticated solution for process groups on + // Windows, but for now, this is a simple way to ensure that the + // process is terminated when the context is cancelled. + return p.Kill() } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy