Terminate the local shell when a session closes #19817 (#19817)

When a client sends an SSH session before the shell process ends, Teleport sends the SIGKILL signal to the subprocess. This causes the still-leaving shell to be resigned to PID 1 and live forever.
This PR tried to stop the terminal first before killing the subprocess to prevent the terminal from living forever.

Closes #13335
Closes #6245
Closes #4469
This commit is contained in:
Jakub Nyckowski 2023-02-24 08:56:11 -05:00 committed by GitHub
parent 2ffbd01944
commit 7ccf2f219a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 237 additions and 56 deletions

View file

@ -346,6 +346,11 @@ type ServerContext struct {
contr *os.File
contw *os.File
// killShell{r,w} are used to send kill signal to the child process
// to terminate the shell.
killShellr *os.File
killShellw *os.File
// ChannelType holds the type of the channel. For example "session" or
// "direct-tcpip". Used to create correct subcommand during re-exec.
ChannelType string
@ -494,6 +499,14 @@ func NewServerContext(ctx context.Context, parent *sshutils.ConnectionContext, s
child.AddCloser(child.contr)
child.AddCloser(child.contw)
child.killShellr, child.killShellw, err = os.Pipe()
if err != nil {
childErr := child.Close()
return nil, nil, trace.NewAggregate(err, childErr)
}
child.AddCloser(child.killShellr)
child.AddCloser(child.killShellw)
// Create pipe used to get X11 forwarding ready signal from the child process.
child.x11rdyr, child.x11rdyw, err = os.Pipe()
if err != nil {

View file

@ -22,7 +22,7 @@ package srv
import (
"fmt"
"os"
os_exec "os/exec"
"os/exec"
"os/user"
"strconv"
"syscall"
@ -30,27 +30,8 @@ import (
"time"
"github.com/stretchr/testify/require"
"github.com/gravitational/teleport/lib/utils"
)
// TestMain will re-execute Teleport to run a command if "exec" is passed to
// it as an argument. Otherwise it will run tests as normal.
func TestMain(m *testing.M) {
utils.InitLoggerForTests()
// If the test is re-executing itself, execute the command that comes over
// the pipe.
if IsReexec() {
RunAndExit(os.Args[1])
return
}
// Otherwise run tests as normal.
code := m.Run()
os.Exit(code)
}
func TestOSCommandPrep(t *testing.T) {
srv := newMockServer(t)
scx := newExecServerContext(t, srv)
@ -139,7 +120,7 @@ func TestContinue(t *testing.T) {
// Configure Session Context to re-exec "ls".
var err error
lsPath, err := os_exec.LookPath("ls")
lsPath, err := exec.LookPath("ls")
require.NoError(t, err)
scx.execRequest.SetCommand(lsPath)
@ -164,7 +145,7 @@ func TestContinue(t *testing.T) {
case <-time.After(5 * time.Second):
}
// Close the continue pipe to signal to Teleport to now execute the
// Close the continue and terminate pipe to signal to Teleport to now execute the
// requested program.
err = scx.contw.Close()
require.NoError(t, err)

View file

@ -30,8 +30,26 @@ import (
apievents "github.com/gravitational/teleport/api/types/events"
"github.com/gravitational/teleport/lib/events"
"github.com/gravitational/teleport/lib/sshutils"
"github.com/gravitational/teleport/lib/utils"
)
// TestMain will re-execute Teleport to run a command if "exec" is passed to
// it as an argument. Otherwise, it will run tests as normal.
func TestMain(m *testing.M) {
utils.InitLoggerForTests()
// If the test is re-executing itself, execute the command that comes over
// the pipe.
if IsReexec() {
RunAndExit(os.Args[1])
return
}
// Otherwise run tests as normal.
code := m.Run()
os.Exit(code)
}
// TestEmitExecAuditEvent make sure the full command and exit code for a
// command is always recorded.
func TestEmitExecAuditEvent(t *testing.T) {

View file

@ -94,6 +94,9 @@ func newTestServerContext(t *testing.T, srv Server, roleSet services.RoleSet) *S
scx.contr, scx.contw, err = os.Pipe()
require.NoError(t, err)
scx.killShellr, scx.killShellw, err = os.Pipe()
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, scx.Close()) })
return scx

View file

@ -58,6 +58,12 @@ const (
// it can continue after the parent process assigns a cgroup to the
// child process.
ContinueFile
// TerminateFile is used to communicate to the child process that
// the interactive terminal should be killed as the client ended the
// SSH session and without termination the terminal process will be assigned
// to pid 1 and "live forever". Killing the shell should not prevent processes
// preventing SIGHUP to be reassigned (ex. processes running with nohup).
TerminateFile
// X11File is used to communicate to the parent process that the child
// process has set up X11 forwarding.
X11File
@ -68,7 +74,7 @@ const (
// FirstExtraFile is the first file descriptor that will be valid when
// extra files are passed to child processes without a terminal.
FirstExtraFile FileFD = X11File + 1
FirstExtraFile = X11File + 1
)
// ExecCommand contains the payload to "teleport exec" which will be used to
@ -181,14 +187,18 @@ func RunCommand() (errw io.Writer, code int, err error) {
errorWriter := os.Stdout
// Parent sends the command payload in the third file descriptor.
cmdfd := os.NewFile(CommandFile, "/proc/self/fd/3")
cmdfd := os.NewFile(CommandFile, fmt.Sprintf("/proc/self/fd/%d", CommandFile))
if cmdfd == nil {
return errorWriter, teleport.RemoteCommandFailure, trace.BadParameter("command pipe not found")
}
contfd := os.NewFile(ContinueFile, "/proc/self/fd/4")
contfd := os.NewFile(ContinueFile, fmt.Sprintf("/proc/self/fd/%d", ContinueFile))
if contfd == nil {
return errorWriter, teleport.RemoteCommandFailure, trace.BadParameter("continue pipe not found")
}
termiantefd := os.NewFile(TerminateFile, fmt.Sprintf("/proc/self/fd/%d", TerminateFile))
if termiantefd == nil {
return errorWriter, teleport.RemoteCommandFailure, trace.BadParameter("terminate pipe not found")
}
// Read in the command payload.
var b bytes.Buffer
@ -237,8 +247,8 @@ func RunCommand() (errw io.Writer, code int, err error) {
// PTY and TTY. Extract them and set the controlling TTY. Otherwise, connect
// std{in,out,err} directly.
if c.Terminal {
pty = os.NewFile(PTYFile, "/proc/self/fd/6")
tty = os.NewFile(TTYFile, "/proc/self/fd/7")
pty = os.NewFile(PTYFile, fmt.Sprintf("/proc/self/fd/%d", PTYFile))
tty = os.NewFile(TTYFile, fmt.Sprintf("/proc/self/fd/%d", TTYFile))
if pty == nil || tty == nil {
return errorWriter, teleport.RemoteCommandFailure, trace.BadParameter("pty and tty not found")
}
@ -371,7 +381,7 @@ func RunCommand() (errw io.Writer, code int, err error) {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", x11.DisplayEnv, c.X11Config.XAuthEntry.Display.String()))
// Open x11rdy fd to signal parent process once X11 forwarding is set up.
x11rdyfd := os.NewFile(X11File, "/proc/self/fd/5")
x11rdyfd := os.NewFile(X11File, fmt.Sprintf("/proc/self/fd/%d", X11File))
if x11rdyfd == nil {
return errorWriter, teleport.RemoteCommandFailure, trace.BadParameter("continue pipe not found")
}
@ -394,19 +404,13 @@ func RunCommand() (errw io.Writer, code int, err error) {
}
// Start the command.
err = cmd.Start()
if err != nil {
if err := cmd.Start(); err != nil {
return errorWriter, teleport.RemoteCommandFailure, trace.Wrap(err)
}
parkerCancel()
// Wait for the command to exit. It doesn't make sense to print an error
// message here because the shell has successfully started. If an error
// occurred during shell execution or the shell exits with an error (like
// running exit 2), the shell will print an error if appropriate and return
// an exit code.
err = cmd.Wait()
err = waitForShell(termiantefd, cmd)
if uaccEnabled {
uaccErr := uacc.Close(c.UaccMetadata.UtmpPath, c.UaccMetadata.WtmpPath, tty)
@ -418,6 +422,42 @@ func RunCommand() (errw io.Writer, code int, err error) {
return io.Discard, exitCode(err), trace.Wrap(err)
}
// waitForShell waits either for the command to return or the kill signal from the parent Teleport process.
func waitForShell(termiantefd *os.File, cmd *exec.Cmd) error {
terminateChan := make(chan error)
go func() {
buf := make([]byte, 1)
// Wait for the terminate file descriptor to be closed. The FD will be closed when Teleport
// parent process wants to terminate the remote command and all childs.
_, err := termiantefd.Read(buf)
if err == io.EOF {
// Kill the shell process
err = trace.Errorf("shell process has been killed: %w", cmd.Process.Kill())
} else {
err = trace.Errorf("failed to read from terminate file: %w", err)
}
terminateChan <- err
}()
go func() {
// Wait for the command to exit. It doesn't make sense to print an error
// message here because the shell has successfully started. If an error
// occurred during shell execution or the shell exits with an error (like
// running exit 2), the shell will print an error if appropriate and return
// an exit code.
err := cmd.Wait()
terminateChan <- err
}()
// Wait only for the first error.
// If the command returns then we don't need to wait for the error from cmd.Process.Kill().
// If the command is being killed, then we don't care about the error code.
err := <-terminateChan
return err
}
// osWrapper wraps system calls, so we can replace them in tests.
type osWrapper struct {
LookupGroup func(name string) (*user.Group, error)
@ -518,7 +558,7 @@ func RunForward() (errw io.Writer, code int, err error) {
errorWriter := os.Stderr
// Parent sends the command payload in the third file descriptor.
cmdfd := os.NewFile(CommandFile, "/proc/self/fd/3")
cmdfd := os.NewFile(CommandFile, fmt.Sprintf("/proc/self/fd/%d", CommandFile))
if cmdfd == nil {
return errorWriter, teleport.RemoteCommandFailure, trace.BadParameter("command pipe not found")
}
@ -859,6 +899,7 @@ func ConfigureCommand(ctx *ServerContext, extraFiles ...*os.File) (*exec.Cmd, er
ExtraFiles: []*os.File{
ctx.cmdr,
ctx.contr,
ctx.killShellr,
ctx.x11rdyw,
},
}

View file

@ -630,18 +630,11 @@ func (s *session) Stop() {
s.BroadcastMessage("Stopping session...")
s.log.Info("Stopping session")
// close io copy loops
// Close io copy loops
s.io.Close()
// Close and kill terminal
if s.term != nil {
if err := s.term.Close(); err != nil {
s.log.WithError(err).Debug("Failed to close the shell")
}
if err := s.term.Kill(context.TODO()); err != nil {
s.log.WithError(err).Debug("Failed to kill the shell")
}
}
// Make sure that the terminal has been closed
s.haltTerminal()
// Close session tracker and mark it as terminated
if err := s.tracker.Close(s.serverCtx); err != nil {
@ -649,6 +642,33 @@ func (s *session) Stop() {
}
}
// haltTerminal closes the terminal. Then is tried to terminate the terminal in a graceful way
// and kill by sending SIGKILL if the graceful termination fails.
func (s *session) haltTerminal() {
if s.term == nil {
return
}
if err := s.term.Close(); err != nil {
s.log.WithError(err).Debug("Failed to close the shell")
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
if err := s.term.KillUnderlyingShell(ctx); err != nil {
s.log.WithError(err).Debug("Failed to terminate the shell")
} else {
// Return before we send SIGKILL to the child process, as doing that
// could interrupt the "graceful shutdown" process.
return
}
if err := s.term.Kill(context.TODO()); err != nil {
s.log.WithError(err).Debug("Failed to kill the shell")
}
}
// Close ends the active session and frees all resources. This should only be called
// by the creator of the session, other closers should use Stop instead. Calling this
// prematurely can result in missing audit events, session recordings, and other

View file

@ -18,6 +18,7 @@ package srv
import (
"context"
"errors"
"io"
"os"
"os/exec"
@ -25,6 +26,7 @@ import (
"strconv"
"sync"
"syscall"
"time"
"github.com/creack/pty"
"github.com/gravitational/trace"
@ -62,6 +64,9 @@ type Terminal interface {
// pre-processing routine (placed in a cgroup).
Continue()
// KillUnderlyingShell tries to gracefully stop the terminal process.
KillUnderlyingShell(ctx context.Context) error
// Kill will force kill the terminal.
Kill(ctx context.Context) error
@ -121,12 +126,16 @@ type terminal struct {
log *log.Entry
cmd *exec.Cmd
ctx *ServerContext
cmd *exec.Cmd
serverContext *ServerContext
pty *os.File
tty *os.File
// terminateFD when closed informs the terminal that
// the process running in the shell should be killed.
terminateFD *os.File
pid int
termType string
@ -141,7 +150,8 @@ func newLocalTerminal(ctx *ServerContext) (*terminal, error) {
log: log.WithFields(log.Fields{
trace.Component: teleport.ComponentLocalTerm,
}),
ctx: ctx,
serverContext: ctx,
terminateFD: ctx.killShellw,
}
// Open PTY and corresponding TTY.
@ -168,10 +178,16 @@ func (t *terminal) AddParty(delta int) {
}
// Run will run the terminal.
func (t *terminal) Run(_ context.Context) error {
func (t *terminal) Run(ctx context.Context) error {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
var err error
// Create the command that will actually execute.
t.cmd, err = ConfigureCommand(t.ctx)
t.cmd, err = ConfigureCommand(t.serverContext)
if err != nil {
return trace.Wrap(err)
}
@ -222,13 +238,44 @@ func (t *terminal) Wait() (*ExecResult, error) {
// Continue will resume execution of the process after it completes its
// pre-processing routine (placed in a cgroup).
func (t *terminal) Continue() {
if err := t.ctx.contw.Close(); err != nil {
if err := t.serverContext.contw.Close(); err != nil {
t.log.Warnf("failed to close server context")
}
}
// Kill will force kill the terminal.
func (t *terminal) Kill(ctx context.Context) error {
// KillUnderlyingShell tries to kill the shell/bash process and waits for the process PID to be released.
func (t *terminal) KillUnderlyingShell(ctx context.Context) error {
if err := t.terminateFD.Close(); err != nil {
if !errors.Is(err, os.ErrClosed) {
t.log.WithError(err).Debug("Failed to close the shell file descriptor")
}
}
pid := t.PID()
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
proc, err := os.FindProcess(pid)
if err != nil {
return trace.Errorf("failed to find the shell process: %w", err)
}
if err := proc.Signal(syscall.Signal(0)); errors.Is(err, os.ErrProcessDone) {
t.log.Debugf("Terminal child process has been stopped")
return nil
}
time.Sleep(10 * time.Millisecond)
}
}
// Kill will force kill the child Teleport process.
func (t *terminal) Kill(_ context.Context) error {
if t.cmd != nil && t.cmd.Process != nil {
if err := t.cmd.Process.Kill(); err != nil {
if err.Error() != "os: process already finished" {
@ -400,7 +447,7 @@ func getOwner(login string, lookupUser LookupUser, lookupGroup LookupGroup) (int
// setOwner changes the owner and mode of the TTY.
func (t *terminal) setOwner() error {
uid, gid, mode, err := getOwner(t.ctx.Identity.Login, user.Lookup, user.LookupGroup)
uid, gid, mode, err := getOwner(t.serverContext.Identity.Login, user.Lookup, user.LookupGroup)
if err != nil {
return trace.Wrap(err)
}
@ -547,6 +594,11 @@ func (t *remoteTerminal) Wait() (*ExecResult, error) {
// Continue does nothing for remote command execution.
func (t *remoteTerminal) Continue() {}
// Terminate does nothing for remote command execution.
func (t *remoteTerminal) KillUnderlyingShell(_ context.Context) error {
return nil
}
func (t *remoteTerminal) Kill(ctx context.Context) error {
err := t.session.Signal(ctx, ssh.SIGKILL)
if err != nil {

View file

@ -17,9 +17,12 @@ limitations under the License.
package srv
import (
"context"
"os"
"os/exec"
"os/user"
"testing"
"time"
"github.com/gravitational/trace"
"github.com/stretchr/testify/require"
@ -78,3 +81,53 @@ func TestGetOwner(t *testing.T) {
require.Equal(t, tt.outMode, mode)
}
}
func TestTerminal_KillUnderlyingShell(t *testing.T) {
t.Parallel()
srv := newMockServer(t)
scx := newTestServerContext(t, srv, nil)
lsPath, err := exec.LookPath("sh")
require.NoError(t, err)
scx.execRequest.SetCommand(lsPath)
term, err := newLocalTerminal(scx)
require.NoError(t, err)
term.SetTermType("xterm")
ctx := context.Background()
// Run sh
err = term.Run(ctx)
require.NoError(t, err)
errors := make(chan error)
go func() {
// Call wait to avoid creating zombie process.
// Ignore exit code as we're checking term.cmd.ProcessState already
_, err := term.Wait()
errors <- err
}()
// Continue execution
err = scx.contw.Close()
require.NoError(t, err)
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
t.Cleanup(cancel)
err = term.KillUnderlyingShell(ctx)
require.NoError(t, err)
// Wait for the process to return.
require.NoError(t, <-errors)
// ProcessState should be not nil after the process exits.
require.NotNil(t, term.cmd.ProcessState)
require.NotZero(t, term.cmd.ProcessState.Pid())
// 255 is returned on subprocess kill.
require.Equal(t, 255, term.cmd.ProcessState.ExitCode())
}