runtime: use GetQueuedCompletionStatusEx on windows if available

GetQueuedCompletionStatusEx allows to dequeue a batch of completion
notifications, which is more efficient than dequeueing one by one.

benchmark                           old ns/op    new ns/op    delta
BenchmarkClientServerParallel4         100605        90945   -9.60%
BenchmarkClientServerParallel4-2        90225        74504  -17.42%

R=golang-dev, alex.brainman
CC=golang-dev
https://golang.org/cl/12436044
This commit is contained in:
Dmitriy Vyukov 2013-08-08 17:41:57 +04:00
parent ed8c5501c7
commit 65834685d3
5 changed files with 80 additions and 33 deletions

View file

@ -206,6 +206,12 @@ func runtime_pollUnblock(pd *PollDesc) {
runtime·ready(wg);
}
uintptr
runtime·netpollfd(PollDesc *pd)
{
return pd->fd;
}
// make pd ready, newly runnable goroutines (if any) are enqueued info gpp list
void
runtime·netpollready(G **gpp, PollDesc *pd, int32 mode)

View file

@ -10,9 +10,11 @@
#pragma dynimport runtime·CreateIoCompletionPort CreateIoCompletionPort "kernel32.dll"
#pragma dynimport runtime·GetQueuedCompletionStatus GetQueuedCompletionStatus "kernel32.dll"
#pragma dynimport runtime·WSAGetOverlappedResult WSAGetOverlappedResult "ws2_32.dll"
extern void *runtime·CreateIoCompletionPort;
extern void *runtime·GetQueuedCompletionStatus;
extern void *runtime·WSAGetOverlappedResult;
#define INVALID_HANDLE_VALUE ((uintptr)-1)
@ -23,12 +25,23 @@ struct net_op
// used by windows
Overlapped o;
// used by netpoll
uintptr runtimeCtx;
PollDesc* pd;
int32 mode;
int32 errno;
uint32 qty;
};
typedef struct OverlappedEntry OverlappedEntry;
struct OverlappedEntry
{
uintptr key;
net_op* op; // In reality it's Overlapped*, but we cast it to net_op* anyway.
uintptr internal;
uint32 qty;
};
static void handlecompletion(G **gpp, net_op *o, int32 errno, uint32 qty);
static uintptr iocphandle = INVALID_HANDLE_VALUE; // completion port io handle
void
@ -64,49 +77,72 @@ runtime·netpollclose(uintptr fd)
G*
runtime·netpoll(bool block)
{
uint32 wait, qty, key;
int32 mode, errno;
net_op *o;
OverlappedEntry entries[64];
uint32 wait, qty, key, flags, n, i;
int32 errno;
net_op *op;
G *gp;
if(iocphandle == INVALID_HANDLE_VALUE)
return nil;
gp = nil;
wait = 0;
if(block)
wait = INFINITE;
retry:
o = nil;
errno = 0;
qty = 0;
wait = INFINITE;
if(!block)
wait = 0;
// TODO(brainman): Need a loop here to fetch all pending notifications
// (or at least a batch). Scheduler will behave better if is given
// a batch of newly runnable goroutines.
// TODO(brainman): Call GetQueuedCompletionStatusEx() here when possible.
if(runtime·stdcall(runtime·GetQueuedCompletionStatus, 5, iocphandle, &qty, &key, &o, (uintptr)wait) == 0) {
errno = runtime·getlasterror();
if(o == nil && errno == WAIT_TIMEOUT) {
if(!block)
if(runtime·GetQueuedCompletionStatusEx != nil) {
n = nelem(entries) / runtime·gomaxprocs;
if(n < 8)
n = 8;
if(runtime·stdcall(runtime·GetQueuedCompletionStatusEx, 6, iocphandle, entries, (uintptr)n, &n, (uintptr)wait, (uintptr)0) == 0) {
errno = runtime·getlasterror();
if(!block && errno == WAIT_TIMEOUT)
return nil;
runtime·throw("netpoll: GetQueuedCompletionStatus timed out");
runtime·printf("netpoll: GetQueuedCompletionStatusEx failed (errno=%d)\n", errno);
runtime·throw("netpoll: GetQueuedCompletionStatusEx failed");
}
if(o == nil) {
runtime·printf("netpoll: GetQueuedCompletionStatus failed (errno=%d)\n", errno);
runtime·throw("netpoll: GetQueuedCompletionStatus failed");
for(i = 0; i < n; i++) {
op = entries[i].op;
errno = 0;
qty = 0;
if(runtime·stdcall(runtime·WSAGetOverlappedResult, 5, runtime·netpollfd(op->pd), op, &qty, (uintptr)0, (uintptr)&flags) == 0)
errno = runtime·getlasterror();
handlecompletion(&gp, op, errno, qty);
}
// dequeued failed IO packet, so report that
} else {
op = nil;
errno = 0;
qty = 0;
if(runtime·stdcall(runtime·GetQueuedCompletionStatus, 5, iocphandle, &qty, &key, &op, (uintptr)wait) == 0) {
errno = runtime·getlasterror();
if(!block && errno == WAIT_TIMEOUT)
return nil;
if(op == nil) {
runtime·printf("netpoll: GetQueuedCompletionStatus failed (errno=%d)\n", errno);
runtime·throw("netpoll: GetQueuedCompletionStatus failed");
}
// dequeued failed IO packet, so report that
}
handlecompletion(&gp, op, errno, qty);
}
if(o == nil)
runtime·throw("netpoll: GetQueuedCompletionStatus returned o == nil");
mode = o->mode;
if(mode != 'r' && mode != 'w') {
runtime·printf("netpoll: GetQueuedCompletionStatus returned invalid mode=%d\n", mode);
runtime·throw("netpoll: GetQueuedCompletionStatus returned invalid mode");
}
o->errno = errno;
o->qty = qty;
runtime·netpollready(&gp, (void*)o->runtimeCtx, mode);
if(block && gp == nil)
goto retry;
return gp;
}
static void
handlecompletion(G **gpp, net_op *op, int32 errno, uint32 qty)
{
int32 mode;
if(op == nil)
runtime·throw("netpoll: GetQueuedCompletionStatus returned op == nil");
mode = op->mode;
if(mode != 'r' && mode != 'w') {
runtime·printf("netpoll: GetQueuedCompletionStatus returned invalid mode=%d\n", mode);
runtime·throw("netpoll: GetQueuedCompletionStatus returned invalid mode");
}
op->errno = errno;
op->qty = qty;
runtime·netpollready(gpp, op->pd, mode);
}

View file

@ -68,6 +68,8 @@ extern void *runtime·timeBeginPeriod;
extern void *runtime·WaitForSingleObject;
extern void *runtime·WriteFile;
void *runtime·GetQueuedCompletionStatusEx;
static int32
getproccount(void)
{
@ -100,6 +102,7 @@ runtime·osinit(void)
SetProcessPriorityBoost = runtime·stdcall(runtime·GetProcAddress, 2, kernel32, "SetProcessPriorityBoost");
if(SetProcessPriorityBoost != nil) // supported since Windows XP
runtime·stdcall(SetProcessPriorityBoost, 2, (uintptr)-1, (uintptr)1);
runtime·GetQueuedCompletionStatusEx = runtime·stdcall(runtime·GetProcAddress, 2, kernel32, "GetQueuedCompletionStatusEx");
}
}

View file

@ -4,6 +4,7 @@
extern void *runtime·LoadLibrary;
extern void *runtime·GetProcAddress;
extern void *runtime·GetQueuedCompletionStatusEx;
// Call a Windows function with stdcall conventions,
// and switch to os stack during the call.

View file

@ -853,6 +853,7 @@ void runtime·netpollinit(void);
int32 runtime·netpollopen(uintptr, PollDesc*);
int32 runtime·netpollclose(uintptr);
void runtime·netpollready(G**, PollDesc*, int32);
uintptr runtime·netpollfd(PollDesc*);
void runtime·crash(void);
void runtime·parsedebugvars(void);
void _rt0_go(void);