regexp: avoid copying each instruction executed

Inst is a 40-byte struct, so avoiding the copy gives a decent speedup:

name                            old time/op    new time/op     delta
Find-8                             160ns ± 4%      109ns ± 4%  -32.22%  (p=0.008 n=5+5)
FindAllNoMatches-8                70.4ns ± 4%     53.8ns ± 0%  -23.58%  (p=0.016 n=5+4)
FindString-8                       154ns ± 6%      107ns ± 0%  -30.37%  (p=0.016 n=5+4)
FindSubmatch-8                     194ns ± 1%      135ns ± 1%  -30.56%  (p=0.008 n=5+5)
FindStringSubmatch-8               193ns ± 8%      131ns ± 0%  -31.82%  (p=0.008 n=5+5)
Literal-8                         42.8ns ± 2%     34.8ns ± 0%  -18.67%  (p=0.008 n=5+5)
NotLiteral-8                       917ns ± 2%      636ns ± 0%  -30.68%  (p=0.008 n=5+5)
MatchClass-8                      1.18µs ± 3%     0.91µs ± 1%  -22.27%  (p=0.016 n=5+4)
MatchClass_InRange-8              1.11µs ± 1%     0.87µs ± 2%  -21.38%  (p=0.008 n=5+5)
ReplaceAll-8                       659ns ± 6%      596ns ± 3%   -9.60%  (p=0.008 n=5+5)
AnchoredLiteralShortNonMatch-8    34.2ns ± 0%     30.4ns ± 1%  -11.20%  (p=0.016 n=4+5)
AnchoredLiteralLongNonMatch-8     38.7ns ± 0%     38.7ns ± 0%     ~     (p=0.579 n=5+5)
AnchoredShortMatch-8              67.0ns ± 1%     52.7ns ± 0%  -21.31%  (p=0.016 n=5+4)
AnchoredLongMatch-8                121ns ± 0%      124ns ±10%     ~     (p=0.730 n=5+5)
OnePassShortA-8                    392ns ± 0%      231ns ± 3%  -41.10%  (p=0.008 n=5+5)
NotOnePassShortA-8                 370ns ± 0%      282ns ± 1%  -23.81%  (p=0.008 n=5+5)
OnePassShortB-8                    280ns ± 0%      179ns ± 1%  -36.05%  (p=0.008 n=5+5)
NotOnePassShortB-8                 226ns ± 0%      185ns ± 3%  -18.26%  (p=0.008 n=5+5)
OnePassLongPrefix-8               51.7ns ± 0%     39.1ns ± 1%  -24.28%  (p=0.016 n=4+5)
OnePassLongNotPrefix-8             213ns ± 2%      132ns ± 1%  -37.86%  (p=0.008 n=5+5)
MatchParallelShared-8             25.3ns ± 3%     23.4ns ± 7%   -7.50%  (p=0.016 n=5+5)
MatchParallelCopied-8             26.5ns ± 7%     22.3ns ± 7%  -16.06%  (p=0.008 n=5+5)
QuoteMetaAll-8                    45.8ns ± 1%     45.8ns ± 1%     ~     (p=1.000 n=5+5)
QuoteMetaNone-8                   24.3ns ± 0%     24.3ns ± 0%     ~     (p=0.325 n=5+5)
Compile/Onepass-8                 1.98µs ± 0%     1.97µs ± 0%   -0.22%  (p=0.016 n=5+4)
Compile/Medium-8                  4.56µs ± 0%     4.55µs ± 1%     ~     (p=0.595 n=5+5)
Compile/Hard-8                    35.7µs ± 0%     35.3µs ± 3%     ~     (p=0.151 n=5+5)
Match/Easy0/16-8                  2.18ns ± 2%     2.19ns ± 5%     ~     (p=0.690 n=5+5)
Match/Easy0/32-8                  27.4ns ± 2%     27.6ns ± 4%     ~     (p=1.000 n=5+5)
Match/Easy0/1K-8                   246ns ± 0%      252ns ± 7%     ~     (p=0.238 n=5+5)
Match/Easy0/32K-8                 4.58µs ± 7%     4.64µs ± 5%     ~     (p=1.000 n=5+5)
Match/Easy0/1M-8                   235µs ± 0%      235µs ± 0%     ~     (p=0.886 n=4+4)
Match/Easy0/32M-8                 7.86ms ± 0%     7.86ms ± 1%     ~     (p=0.730 n=4+5)
Match/Easy0i/16-8                 2.15ns ± 0%     2.15ns ± 0%     ~     (p=0.246 n=5+5)
Match/Easy0i/32-8                  507ns ± 2%      466ns ± 4%   -8.03%  (p=0.008 n=5+5)
Match/Easy0i/1K-8                 14.7µs ± 0%     13.6µs ± 2%   -7.63%  (p=0.008 n=5+5)
Match/Easy0i/32K-8                 571µs ± 1%      570µs ± 1%     ~     (p=0.556 n=4+5)
Match/Easy0i/1M-8                 18.2ms ± 0%     18.8ms ±11%     ~     (p=0.548 n=5+5)
Match/Easy0i/32M-8                 581ms ± 0%      590ms ± 1%   +1.52%  (p=0.016 n=4+5)
Match/Easy1/16-8                  2.17ns ± 0%     2.15ns ± 0%   -0.90%  (p=0.000 n=5+4)
Match/Easy1/32-8                  25.1ns ± 0%     25.4ns ± 4%     ~     (p=0.651 n=5+5)
Match/Easy1/1K-8                   462ns ± 1%      431ns ± 4%   -6.56%  (p=0.008 n=5+5)
Match/Easy1/32K-8                 18.8µs ± 0%     18.8µs ± 1%     ~     (p=1.000 n=5+5)
Match/Easy1/1M-8                   658µs ± 0%      658µs ± 1%     ~     (p=0.841 n=5+5)
Match/Easy1/32M-8                 21.0ms ± 1%     21.0ms ± 2%     ~     (p=0.841 n=5+5)
Match/Medium/16-8                 2.15ns ± 0%     2.16ns ± 0%     ~     (p=0.714 n=4+5)
Match/Medium/32-8                  561ns ± 1%      512ns ± 5%   -8.69%  (p=0.008 n=5+5)
Match/Medium/1K-8                 16.9µs ± 0%     15.2µs ± 1%  -10.40%  (p=0.008 n=5+5)
Match/Medium/32K-8                 632µs ± 0%      631µs ± 1%     ~     (p=0.421 n=5+5)
Match/Medium/1M-8                 20.3ms ± 1%     20.1ms ± 0%     ~     (p=0.190 n=5+4)
Match/Medium/32M-8                 650ms ± 1%      646ms ± 0%   -0.58%  (p=0.032 n=5+4)
Match/Hard/16-8                   2.15ns ± 0%     2.15ns ± 1%     ~     (p=0.111 n=5+5)
Match/Hard/32-8                    870ns ± 2%      667ns ± 1%  -23.28%  (p=0.008 n=5+5)
Match/Hard/1K-8                   26.9µs ± 0%     21.0µs ± 2%  -21.83%  (p=0.008 n=5+5)
Match/Hard/32K-8                   833µs ± 0%      833µs ± 1%     ~     (p=0.548 n=5+5)
Match/Hard/1M-8                   26.6ms ± 0%     26.8ms ± 1%     ~     (p=0.905 n=4+5)
Match/Hard/32M-8                   856ms ± 0%      851ms ± 0%   -0.65%  (p=0.016 n=5+4)
Match/Hard1/16-8                  2.96µs ±12%     1.81µs ± 3%  -38.68%  (p=0.008 n=5+5)
Match/Hard1/32-8                  5.62µs ± 3%     3.48µs ± 0%  -38.07%  (p=0.016 n=5+4)
Match/Hard1/1K-8                   175µs ± 5%      108µs ± 0%  -37.85%  (p=0.016 n=5+4)
Match/Hard1/32K-8                 4.09ms ± 2%     4.05ms ± 0%   -0.85%  (p=0.016 n=5+4)
Match/Hard1/1M-8                   131ms ± 0%      131ms ± 3%     ~     (p=0.151 n=5+5)
Match/Hard1/32M-8                  4.19s ± 0%      4.20s ± 1%     ~     (p=1.000 n=5+5)
Match_onepass_regex/16-8           262ns ± 2%      170ns ± 2%  -35.13%  (p=0.008 n=5+5)
Match_onepass_regex/32-8           463ns ± 0%      306ns ± 0%  -33.90%  (p=0.008 n=5+5)
Match_onepass_regex/1K-8          13.3µs ± 2%      8.8µs ± 0%  -33.84%  (p=0.008 n=5+5)
Match_onepass_regex/32K-8          424µs ± 3%      280µs ± 1%  -33.93%  (p=0.008 n=5+5)
Match_onepass_regex/1M-8          13.4ms ± 0%      9.0ms ± 1%  -32.80%  (p=0.016 n=4+5)
Match_onepass_regex/32M-8          427ms ± 0%      288ms ± 1%  -32.60%  (p=0.008 n=5+5)

Change-Id: I02c54176ed5c9f5b5fc99524a2d5eb1c490f0ebf
Reviewed-on: https://go-review.googlesource.com/c/go/+/355789
Reviewed-by: Peter Weinberger <pjw@google.com>
Reviewed-by: Russ Cox <rsc@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
Auto-Submit: Russ Cox <rsc@golang.org>
This commit is contained in:
Bryan Boreham 2021-10-13 22:40:22 +01:00 committed by Gopher Robot
parent 865911424d
commit 0293c51bc5
2 changed files with 4 additions and 4 deletions

View file

@ -163,7 +163,7 @@ func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
}
Skip:
inst := re.prog.Inst[pc]
inst := &re.prog.Inst[pc]
switch inst.Op {
default:

View file

@ -427,7 +427,7 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in
flag = i.context(pos)
}
pc := re.onepass.Start
inst := re.onepass.Inst[pc]
inst := &re.onepass.Inst[pc]
// If there is a simple literal prefix, skip over it.
if pos == 0 && flag.match(syntax.EmptyOp(inst.Arg)) &&
len(re.prefix) > 0 && i.canCheckPrefix() {
@ -442,7 +442,7 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in
pc = int(re.prefixEnd)
}
for {
inst = re.onepass.Inst[pc]
inst = &re.onepass.Inst[pc]
pc = int(inst.Out)
switch inst.Op {
default:
@ -470,7 +470,7 @@ func (re *Regexp) doOnePass(ir io.RuneReader, ib []byte, is string, pos, ncap in
}
// peek at the input rune to see which branch of the Alt to take
case syntax.InstAlt, syntax.InstAltMatch:
pc = int(onePassNext(&inst, r))
pc = int(onePassNext(inst, r))
continue
case syntax.InstFail:
goto Return