runtime: implement time.now in assembly for linux-amd64

name                                                       old time/op      new time/op      delta
AfterFunc-12                                                   66.7µs ± 3%      66.8µs ± 4%     ~     (p=0.836 n=20+20)
After-12                                                       99.4µs ± 4%      98.1µs ± 3%   -1.31%  (p=0.013 n=20+20)
Stop-12                                                        66.1µs ±12%      65.4µs ±10%     ~     (p=0.602 n=20+20)
SimultaneousAfterFunc-12                                        110µs ± 1%       114µs ± 2%   +3.98%  (p=0.000 n=19+18)
StartStop-12                                                   32.1µs ±15%      32.2µs ±13%     ~     (p=0.620 n=20+20)
Reset-12                                                       3.66µs ± 2%      3.63µs ± 5%   -0.92%  (p=0.018 n=20+17)
Sleep-12                                                        134µs ± 1%       139µs ± 4%   +3.97%  (p=0.000 n=20+18)
Ticker-12                                                      32.8µs ± 1%      32.6µs ± 2%   -0.63%  (p=0.017 n=18+20)
TickerReset-12                                                 3.72µs ± 3%      3.71µs ± 3%     ~     (p=0.753 n=20+20)
TickerResetNaive-12                                            68.9µs ±11%      65.8µs ± 8%   -4.44%  (p=0.008 n=20+20)
Now-12                                                         33.3ns ± 1%      29.6ns ± 0%  -11.06%  (p=0.000 n=18+16)
NowUnixNano-12                                                 34.6ns ± 0%      31.2ns ± 0%   -9.94%  (p=0.000 n=20+17)
NowUnixMilli-12                                                35.0ns ± 1%      30.9ns ± 0%  -11.75%  (p=0.000 n=19+15)
NowUnixMicro-12                                                35.0ns ± 0%      30.9ns ± 0%  -11.85%  (p=0.000 n=20+19)
Format-12                                                       302ns ± 3%       306ns ± 3%   +1.22%  (p=0.009 n=20+20)
FormatNow-12                                                    184ns ± 5%       187ns ± 2%   +1.25%  (p=0.046 n=20+19)
MarshalJSON-12                                                  262ns ± 2%       270ns ± 3%   +2.99%  (p=0.000 n=20+20)
MarshalText-12                                                  262ns ± 3%       268ns ± 3%   +2.37%  (p=0.000 n=19+19)
Parse-12                                                        145ns ± 1%       148ns ± 0%   +2.27%  (p=0.000 n=18+19)
ParseDuration-12                                               82.3ns ± 1%      79.7ns ± 1%   -3.06%  (p=0.000 n=20+20)
Hour-12                                                        4.48ns ± 1%      4.42ns ± 1%   -1.32%  (p=0.000 n=19+19)
Second-12                                                      4.44ns ± 1%      4.42ns ± 1%   -0.39%  (p=0.000 n=20+18)
Year-12                                                        11.2ns ± 1%      11.1ns ± 1%     ~     (p=0.193 n=20+20)
Day-12                                                         14.8ns ± 0%      14.8ns ± 1%     ~     (p=0.873 n=19+20)
ISOWeek-12                                                     17.2ns ± 0%      17.2ns ± 0%     ~     (p=0.605 n=18+20)

name                                                       old avg-late-ns  new avg-late-ns  delta
ParallelTimerLatency-12                                          375k ± 3%        377k ± 3%     ~     (p=0.445 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-12         136k ± 2%        137k ± 2%     ~     (p=0.242 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-12        97.4k ±11%       96.4k ±10%     ~     (p=0.336 n=19+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-12        74.8k ± 3%       74.2k ± 3%     ~     (p=0.158 n=20+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-12        70.7k ± 7%       70.4k ± 6%     ~     (p=0.879 n=20+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-12        65.8k ± 9%       66.3k ±14%     ~     (p=0.594 n=17+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-12        55.5k ±29%       56.7k ±30%     ~     (p=0.758 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-12        45.3k ±29%       43.6k ±33%     ~     (p=0.212 n=19+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-12        64.7k ±46%       65.2k ±78%     ~     (p=0.480 n=18+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-12         147k ±88%        119k ±83%     ~     (p=0.092 n=19+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-12       2.63M ±29%       2.70M ±59%     ~     (p=0.989 n=19+20)
StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-12          81.4k ± 4%       80.2k ± 3%   -1.55%  (p=0.009 n=17+18)

name                                                       old max-late-ns  new max-late-ns  delta
ParallelTimerLatency-12                                        7.66M ±102%      6.98M ±131%     ~     (p=0.445 n=20+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=1-12         381k ±12%        382k ±17%     ~     (p=0.901 n=17+16)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=2-12         388k ±69%        356k ±10%     ~     (p=0.363 n=17+16)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=3-12         350k ±17%        347k ±25%     ~     (p=0.538 n=19+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=4-12         378k ±52%        341k ±30%     ~     (p=0.153 n=18+17)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=5-12         392k ±54%        410k ±78%     ~     (p=0.730 n=18+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=6-12         467k ±80%       527k ±129%     ~     (p=0.616 n=17+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=7-12        915k ±138%      1023k ±227%     ~     (p=0.696 n=20+18)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=8-12       1.84M ±155%      1.74M ±158%     ~     (p=0.893 n=18+19)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=9-12        6.95M ±92%       7.66M ±91%     ~     (p=0.687 n=19+20)
StaggeredTickerLatency/work-dur=300µs/tickers-per-P=10-12       18.6M ±22%       16.2M ±28%  -12.78%  (p=0.003 n=19+19)
StaggeredTickerLatency/work-dur=2ms/tickers-per-P=1-12          1.01M ± 8%       1.04M ±10%     ~     (p=0.111 n=19+18)

Change-Id: I96aa2e0206a6e9286bcbfc8be372e84608ed4e2f
Reviewed-on: https://go-review.googlesource.com/c/go/+/314277
Trust: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Ian Lance Taylor 2021-04-27 16:46:53 -07:00
parent 756fd56bbf
commit d09947522d
3 changed files with 104 additions and 4 deletions

View file

@ -0,0 +1,97 @@
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !faketime
// +build !faketime
#include "go_asm.h"
#include "go_tls.h"
#include "textflag.h"
#define SYS_clock_gettime 228
// func time.now() (sec int64, nsec int32, mono int64)
TEXT time·now(SB),NOSPLIT,$16-24
MOVQ SP, R12 // Save old SP; R12 unchanged by C code.
#ifdef GOEXPERIMENT_regabig
MOVQ g_m(R14), BX // BX unchanged by C code.
#else
get_tls(CX)
MOVQ g(CX), AX
MOVQ g_m(AX), BX // BX unchanged by C code.
#endif
// Store CLOCK_REALTIME results directly to return space.
LEAQ sec+0(FP), SI
// Set vdsoPC and vdsoSP for SIGPROF traceback.
// Save the old values on stack and restore them on exit,
// so this function is reentrant.
MOVQ m_vdsoPC(BX), CX
MOVQ m_vdsoSP(BX), DX
MOVQ CX, 0(SP)
MOVQ DX, 8(SP)
MOVQ -8(SI), CX // Sets CX to function return address.
MOVQ CX, m_vdsoPC(BX)
MOVQ SI, m_vdsoSP(BX)
#ifdef GOEXPERIMENT_regabig
CMPQ R14, m_curg(BX) // Only switch if on curg.
#else
CMPQ AX, m_curg(BX) // Only switch if on curg.
#endif
JNE noswitch
MOVQ m_g0(BX), DX
MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
noswitch:
SUBQ $16, SP // Space for monotonic time results
ANDQ $~15, SP // Align for C code
MOVL $0, DI // CLOCK_REALTIME
MOVQ runtime·vdsoClockgettimeSym(SB), AX
CMPQ AX, $0
JEQ fallback
CALL AX
MOVL $1, DI // CLOCK_MONOTONIC
LEAQ 0(SP), SI
MOVQ runtime·vdsoClockgettimeSym(SB), AX
CALL AX
ret:
MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec
MOVQ R12, SP // Restore real SP
// Restore vdsoPC, vdsoSP
// We don't worry about being signaled between the two stores.
// If we are not in a signal handler, we'll restore vdsoSP to 0,
// and no one will care about vdsoPC. If we are in a signal handler,
// we cannot receive another signal.
MOVQ 8(SP), CX
MOVQ CX, m_vdsoSP(BX)
MOVQ 0(SP), CX
MOVQ CX, m_vdsoPC(BX)
// sec is in AX, nsec in DX
// return nsec in AX
IMULQ $1000000000, AX
ADDQ DX, AX
MOVQ AX, mono+16(FP)
RET
fallback:
MOVQ $SYS_clock_gettime, AX
SYSCALL
MOVL $1, DI // CLOCK_MONOTONIC
LEAQ 0(SP), SI
MOVQ $SYS_clock_gettime, AX
SYSCALL
JMP ret

View file

@ -4,8 +4,9 @@
// Declarations for operating systems implementing time.now directly in assembly.
//go:build !faketime && windows
// +build !faketime,windows
//go:build !faketime && (windows || (linux && amd64))
// +build !faketime
// +build windows linux,amd64
package runtime

View file

@ -5,8 +5,10 @@
// Declarations for operating systems implementing time.now
// indirectly, in terms of walltime and nanotime assembly.
//go:build !faketime && !windows
// +build !faketime,!windows
//go:build !faketime && !windows && !(linux && amd64)
// +build !faketime
// +build !windows
// +build !linux !amd64
package runtime