net: handle spurious netpoll wakeups in connect

In some cases the netpoll code can cause a spurious wakeup. This is
normally harmless, as the woken up code simply retries the operation.
However, for connect, the test we were using to see whether the
connect had succeeded (setsockopt(SO_ERROR)) was not reliable in the
case of a spurious wakeup.  Change to using a reliable test (getpeername).
On Darwin we used a different technique: a second call to connect;
change Darwin to use getpeername as well.

Return the result of getpeername to avoid having to call it twice.

Fixes #19289.

Change-Id: I119ec8e7a41f482f1e590d4c65a37f6103fa22d9
Reviewed-on: https://go-review.googlesource.com/45815
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Ian Lance Taylor 2017-06-14 19:55:40 -07:00
parent ebcdda4732
commit bf0f692202
4 changed files with 77 additions and 32 deletions

View file

@ -63,7 +63,7 @@ func (fd *netFD) name() string {
return fd.net + ":" + ls + "->" + rs
}
func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret error) {
func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (rsa syscall.Sockaddr, ret error) {
// Do not need to call fd.writeLock here,
// because fd is not yet accessible to user,
// so no concurrent operations are possible.
@ -72,14 +72,14 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret erro
case nil, syscall.EISCONN:
select {
case <-ctx.Done():
return mapErr(ctx.Err())
return nil, mapErr(ctx.Err())
default:
}
if err := fd.pfd.Init(fd.net, true); err != nil {
return err
return nil, err
}
runtime.KeepAlive(fd)
return nil
return nil, nil
case syscall.EINVAL:
// On Solaris we can see EINVAL if the socket has
// already been accepted and closed by the server.
@ -87,14 +87,14 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret erro
// the socket will see EOF. For details and a test
// case in C see https://golang.org/issue/6828.
if runtime.GOOS == "solaris" {
return nil
return nil, nil
}
fallthrough
default:
return os.NewSyscallError("connect", err)
return nil, os.NewSyscallError("connect", err)
}
if err := fd.pfd.Init(fd.net, true); err != nil {
return err
return nil, err
}
if deadline, _ := ctx.Deadline(); !deadline.IsZero() {
fd.pfd.SetWriteDeadline(deadline)
@ -152,30 +152,28 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (ret erro
if err := fd.pfd.WaitWrite(); err != nil {
select {
case <-ctx.Done():
return mapErr(ctx.Err())
return nil, mapErr(ctx.Err())
default:
}
return err
return nil, err
}
nerr, err := getsockoptIntFunc(fd.pfd.Sysfd, syscall.SOL_SOCKET, syscall.SO_ERROR)
if err != nil {
return os.NewSyscallError("getsockopt", err)
return nil, os.NewSyscallError("getsockopt", err)
}
switch err := syscall.Errno(nerr); err {
case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR:
case syscall.Errno(0), syscall.EISCONN:
if runtime.GOOS != "darwin" {
return nil
}
// See golang.org/issue/14548.
// On Darwin, multiple connect system calls on
// a non-blocking socket never harm SO_ERROR.
switch err := connectFunc(fd.pfd.Sysfd, ra); err {
case nil, syscall.EISCONN:
return nil
case syscall.EISCONN:
return nil, nil
case syscall.Errno(0):
// The runtime poller can wake us up spuriously;
// see issues 14548 and 19289. Check that we are
// really connected; if not, wait again.
if rsa, err := syscall.Getpeername(fd.pfd.Sysfd); err == nil {
return rsa, nil
}
default:
return os.NewSyscallError("getsockopt", err)
return nil, os.NewSyscallError("getsockopt", err)
}
runtime.KeepAlive(fd)
}

View file

@ -65,12 +65,13 @@ func (fd *netFD) setAddr(laddr, raddr Addr) {
runtime.SetFinalizer(fd, (*netFD).Close)
}
func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error {
// Always returns nil for connected peer address result.
func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (syscall.Sockaddr, error) {
// Do not need to call fd.writeLock here,
// because fd is not yet accessible to user,
// so no concurrent operations are possible.
if err := fd.init(); err != nil {
return err
return nil, err
}
if deadline, ok := ctx.Deadline(); ok && !deadline.IsZero() {
fd.pfd.SetWriteDeadline(deadline)
@ -78,7 +79,7 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error {
}
if !canUseConnectEx(fd.net) {
err := connectFunc(fd.pfd.Sysfd, ra)
return os.NewSyscallError("connect", err)
return nil, os.NewSyscallError("connect", err)
}
// ConnectEx windows API requires an unconnected, previously bound socket.
if la == nil {
@ -91,7 +92,7 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error {
panic("unexpected type in connect")
}
if err := syscall.Bind(fd.pfd.Sysfd, la); err != nil {
return os.NewSyscallError("bind", err)
return nil, os.NewSyscallError("bind", err)
}
}
@ -115,16 +116,16 @@ func (fd *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) error {
if err := fd.pfd.ConnectEx(ra); err != nil {
select {
case <-ctx.Done():
return mapErr(ctx.Err())
return nil, mapErr(ctx.Err())
default:
if _, ok := err.(syscall.Errno); ok {
err = os.NewSyscallError("connectex", err)
}
return err
return nil, err
}
}
// Refresh socket properties.
return os.NewSyscallError("setsockopt", syscall.Setsockopt(fd.pfd.Sysfd, syscall.SOL_SOCKET, syscall.SO_UPDATE_CONNECT_CONTEXT, (*byte)(unsafe.Pointer(&fd.pfd.Sysfd)), int32(unsafe.Sizeof(fd.pfd.Sysfd))))
return nil, os.NewSyscallError("setsockopt", syscall.Setsockopt(fd.pfd.Sysfd, syscall.SOL_SOCKET, syscall.SO_UPDATE_CONNECT_CONTEXT, (*byte)(unsafe.Pointer(&fd.pfd.Sysfd)), int32(unsafe.Sizeof(fd.pfd.Sysfd))))
}
func (fd *netFD) Close() error {

View file

@ -133,12 +133,13 @@ func (fd *netFD) dial(ctx context.Context, laddr, raddr sockaddr) error {
}
}
}
var rsa syscall.Sockaddr
var rsa syscall.Sockaddr // remote address from the user
var crsa syscall.Sockaddr // remote address we actually connected to
if raddr != nil {
if rsa, err = raddr.sockaddr(fd.family); err != nil {
return err
}
if err := fd.connect(ctx, lsa, rsa); err != nil {
if crsa, err = fd.connect(ctx, lsa, rsa); err != nil {
return err
}
fd.isConnected = true
@ -147,8 +148,16 @@ func (fd *netFD) dial(ctx context.Context, laddr, raddr sockaddr) error {
return err
}
}
// Record the local and remote addresses from the actual socket.
// Get the local address by calling Getsockname.
// For the remote address, use
// 1) the one returned by the connect method, if any; or
// 2) the one from Getpeername, if it succeeds; or
// 3) the one passed to us as the raddr parameter.
lsa, _ = syscall.Getsockname(fd.pfd.Sysfd)
if rsa, _ = syscall.Getpeername(fd.pfd.Sysfd); rsa != nil {
if crsa != nil {
fd.setAddr(fd.addrFunc()(lsa), fd.addrFunc()(crsa))
} else if rsa, _ = syscall.Getpeername(fd.pfd.Sysfd); rsa != nil {
fd.setAddr(fd.addrFunc()(lsa), fd.addrFunc()(rsa))
} else {
fd.setAddr(fd.addrFunc()(lsa), raddr)

View file

@ -2,11 +2,14 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin
// +build !plan9,!windows
package net
import (
"context"
"internal/testenv"
"math/rand"
"runtime"
"sync"
"syscall"
@ -77,3 +80,37 @@ func TestTCPSpuriousConnSetupCompletion(t *testing.T) {
ln.Close()
wg.Wait()
}
// Issue 19289.
// Test that a canceled Dial does not cause a subsequent Dial to succeed.
func TestTCPSpuriousConnSetupCompletionWithCancel(t *testing.T) {
if testenv.Builder() == "" {
testenv.MustHaveExternalNetwork(t)
}
t.Parallel()
const tries = 10000
var wg sync.WaitGroup
wg.Add(tries * 2)
sem := make(chan bool, 5)
for i := 0; i < tries; i++ {
sem <- true
ctx, cancel := context.WithCancel(context.Background())
go func() {
defer wg.Done()
time.Sleep(time.Duration(rand.Int63n(int64(5 * time.Millisecond))))
cancel()
}()
go func(i int) {
defer wg.Done()
var dialer Dialer
// Try to connect to a real host on a port
// that it is not listening on.
_, err := dialer.DialContext(ctx, "tcp", "golang.org:3")
if err == nil {
t.Errorf("Dial to unbound port succeeded on attempt %d", i)
}
<-sem
}(i)
}
wg.Wait()
}