text/tabwriter: reduce allocations from tracking cells

The tabwriter tracks cells on a line-by-line basis.
This can be memory-hungry when working with large input.

This change adds two optimizations.

First, when there's an existing cell slice for a line,
don't overwrite it by appending.
This helps when re-using a Writer,
or when the output is broken into groups,
e.g. by a blank line.
We now re-use that existing cell slice.

Second, we predict that the number of cells in a line
will probably match those of the previous line,
since tabwriter is most often used to format tables.

This has a noticeable impact on cmd/objdump (#24725).
It reduces allocated space by about 55%.
It also speeds it up some.
Using "benchcmd -n 10 Objdump go tool objdump `which go`":

name            old time/op       new time/op       delta
ObjdumpCompile        9.03s ± 1%        8.51s ± 1%  -5.81%  (p=0.000 n=10+10)

It might also imaginably speed up gofmt on some
large machine-generated code.

name                old time/op    new time/op    delta
Table/1x10/new-8            2.89µs ± 1%    2.39µs ± 1%   -17.39%  (p=0.000 n=13+14)
Table/1x10/reuse-8          2.13µs ± 1%    1.29µs ± 2%   -39.58%  (p=0.000 n=14+15)
Table/1x1000/new-8           203µs ± 0%     147µs ± 1%   -27.45%  (p=0.000 n=13+14)
Table/1x1000/reuse-8         194µs ± 1%     113µs ± 2%   -42.01%  (p=0.000 n=14+15)
Table/1x100000/new-8        33.1ms ± 1%    27.5ms ± 2%   -17.08%  (p=0.000 n=15+15)
Table/1x100000/reuse-8      22.0ms ± 3%    11.8ms ± 1%   -46.23%  (p=0.000 n=14+12)
Table/10x10/new-8           8.51µs ± 0%    6.52µs ± 1%   -23.48%  (p=0.000 n=13+15)
Table/10x10/reuse-8         7.41µs ± 0%    4.59µs ± 3%   -38.03%  (p=0.000 n=14+15)
Table/10x1000/new-8          749µs ± 0%     521µs ± 1%   -30.39%  (p=0.000 n=12+15)
Table/10x1000/reuse-8        732µs ± 1%     448µs ± 2%   -38.79%  (p=0.000 n=15+14)
Table/10x100000/new-8        102ms ± 2%      74ms ± 2%   -28.05%  (p=0.000 n=14+15)
Table/10x100000/reuse-8     96.2ms ± 4%    55.4ms ± 3%   -42.36%  (p=0.000 n=15+15)
Table/100x10/new-8          50.3µs ± 1%    43.3µs ± 1%   -13.87%  (p=0.000 n=14+15)
Table/100x10/reuse-8        47.6µs ± 1%    36.1µs ± 1%   -24.09%  (p=0.000 n=14+14)
Table/100x1000/new-8        5.17ms ± 1%    4.11ms ± 1%   -20.40%  (p=0.000 n=14+13)
Table/100x1000/reuse-8      5.00ms ± 1%    3.73ms ± 1%   -25.46%  (p=0.000 n=14+14)
Table/100x100000/new-8       654ms ± 2%     531ms ± 2%   -18.86%  (p=0.000 n=13+14)
Table/100x100000/reuse-8     709ms ± 1%     505ms ± 2%   -28.77%  (p=0.000 n=12+15)
Pyramid/10-8                4.22µs ± 1%    4.21µs ± 1%      ~     (p=0.067 n=14+14)
Pyramid/100-8                378µs ± 0%     378µs ± 0%    +0.17%  (p=0.022 n=13+13)
Pyramid/1000-8               133ms ± 3%     132ms ± 3%      ~     (p=0.148 n=15+15)
Ragged/10-8                 6.10µs ± 0%    5.16µs ± 0%   -15.38%  (p=0.000 n=14+15)
Ragged/100-8                54.5µs ± 0%    43.8µs ± 0%   -19.59%  (p=0.000 n=14+15)
Ragged/1000-8                532µs ± 0%     424µs ± 0%   -20.25%  (p=0.000 n=14+14)

name                old alloc/op   new alloc/op   delta
Table/1x10/new-8            1.76kB ± 0%    1.52kB ± 0%   -13.64%  (p=0.000 n=15+15)
Table/1x10/reuse-8            800B ± 0%        0B       -100.00%  (p=0.000 n=15+15)
Table/1x1000/new-8           131kB ± 0%      99kB ± 0%   -24.30%  (p=0.000 n=15+15)
Table/1x1000/reuse-8        80.0kB ± 0%     0.0kB ± 0%   -99.99%  (p=0.000 n=15+15)
Table/1x100000/new-8        23.1MB ± 0%    19.9MB ± 0%   -13.85%  (p=0.000 n=15+15)
Table/1x100000/reuse-8      8.30MB ± 0%    0.20MB ± 0%   -97.60%  (p=0.000 n=13+12)
Table/10x10/new-8           8.94kB ± 0%    5.06kB ± 0%   -43.47%  (p=0.000 n=15+15)
Table/10x10/reuse-8         7.52kB ± 0%    0.00kB       -100.00%  (p=0.000 n=15+15)
Table/10x1000/new-8          850kB ± 0%     387kB ± 0%   -54.50%  (p=0.000 n=13+15)
Table/10x1000/reuse-8        752kB ± 0%       0kB ± 0%   -99.98%  (p=0.000 n=13+15)
Table/10x100000/new-8       95.7MB ± 0%    49.3MB ± 0%   -48.50%  (p=0.000 n=14+15)
Table/10x100000/reuse-8     76.2MB ± 0%     2.5MB ± 0%   -96.77%  (p=0.000 n=13+15)
Table/100x10/new-8          66.3kB ± 0%    38.0kB ± 0%   -42.65%  (p=0.000 n=15+15)
Table/100x10/reuse-8        61.3kB ± 0%     0.0kB       -100.00%  (p=0.000 n=15+15)
Table/100x1000/new-8        6.69MB ± 0%    3.25MB ± 0%   -51.37%  (p=0.000 n=15+15)
Table/100x1000/reuse-8      6.13MB ± 0%    0.01MB ± 0%   -99.89%  (p=0.000 n=15+15)
Table/100x100000/new-8       684MB ± 0%     340MB ± 0%   -50.29%  (p=0.000 n=14+15)
Table/100x100000/reuse-8     648MB ± 0%     170MB ± 0%   -73.78%  (p=0.000 n=14+13)
Pyramid/10-8                4.40kB ± 0%    4.40kB ± 0%      ~     (all equal)
Pyramid/100-8                652kB ± 0%     652kB ± 0%      ~     (p=0.715 n=15+15)
Pyramid/1000-8              96.7MB ± 0%    96.7MB ± 0%      ~     (p=0.084 n=15+14)
Ragged/10-8                 5.17kB ± 0%    4.51kB ± 0%   -12.69%  (p=0.000 n=15+15)
Ragged/100-8                50.2kB ± 0%    41.1kB ± 0%   -18.04%  (p=0.000 n=15+15)
Ragged/1000-8                492kB ± 0%     401kB ± 0%   -18.61%  (p=0.000 n=15+15)

name                old allocs/op  new allocs/op  delta
Table/1x10/new-8              29.0 ± 0%      21.0 ± 0%   -27.59%  (p=0.000 n=15+15)
Table/1x10/reuse-8            20.0 ± 0%       0.0       -100.00%  (p=0.000 n=15+15)
Table/1x1000/new-8           2.02k ± 0%     1.02k ± 0%   -49.38%  (p=0.000 n=15+15)
Table/1x1000/reuse-8         2.00k ± 0%     0.00k       -100.00%  (p=0.000 n=15+15)
Table/1x100000/new-8          200k ± 0%      100k ± 0%   -49.98%  (p=0.000 n=15+15)
Table/1x100000/reuse-8        200k ± 0%        1k ± 0%   -99.50%  (p=0.000 n=14+15)
Table/10x10/new-8             66.0 ± 0%      31.0 ± 0%   -53.03%  (p=0.000 n=15+15)
Table/10x10/reuse-8           50.0 ± 0%       0.0       -100.00%  (p=0.000 n=15+15)
Table/10x1000/new-8          5.03k ± 0%     1.04k ± 0%   -79.36%  (p=0.000 n=15+15)
Table/10x1000/reuse-8        5.00k ± 0%     0.00k       -100.00%  (p=0.000 n=15+15)
Table/10x100000/new-8         500k ± 0%      100k ± 0%   -79.99%  (p=0.000 n=15+15)
Table/10x100000/reuse-8       500k ± 0%        5k ± 0%   -99.00%  (p=0.000 n=15+15)
Table/100x10/new-8             102 ± 0%        40 ± 0%   -60.78%  (p=0.000 n=15+15)
Table/100x10/reuse-8          80.0 ± 0%       0.0       -100.00%  (p=0.000 n=15+15)
Table/100x1000/new-8         8.04k ± 0%     1.05k ± 0%   -86.91%  (p=0.000 n=15+15)
Table/100x1000/reuse-8       8.00k ± 0%     0.00k ± 0%   -99.98%  (p=0.000 n=15+15)
Table/100x100000/new-8        800k ± 0%      100k ± 0%   -87.49%  (p=0.000 n=15+12)
Table/100x100000/reuse-8      800k ± 0%       50k ± 0%   -93.74%  (p=0.000 n=14+13)
Pyramid/10-8                  20.0 ± 0%      20.0 ± 0%      ~     (all equal)
Pyramid/100-8                 50.0 ± 0%      50.0 ± 0%      ~     (all equal)
Pyramid/1000-8                 109 ± 0%       109 ± 0%      ~     (all equal)
Ragged/10-8                   54.0 ± 0%      34.0 ± 0%   -37.04%  (p=0.000 n=15+15)
Ragged/100-8                   422 ± 0%       188 ± 0%   -55.45%  (p=0.000 n=15+15)
Ragged/1000-8                4.03k ± 0%     1.66k ± 0%   -58.80%  (p=0.000 n=15+15)

Change-Id: I0c0a392b02d5148a0a4b8ad4eaf98fa343980962
Reviewed-on: https://go-review.googlesource.com/106979
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Josh Bleecher Snyder 2018-04-07 17:40:57 -07:00
parent 77faa652c1
commit 9ee7662cd6
2 changed files with 100 additions and 1 deletions

View file

@ -106,7 +106,27 @@ type Writer struct {
widths []int // list of column widths in runes - re-used during formatting
}
func (b *Writer) addLine() { b.lines = append(b.lines, []cell{}) }
func (b *Writer) addLine() {
// Grow slice instead of appending,
// as that gives us an opportunity
// to re-use an existing []cell.
if n := len(b.lines) + 1; n <= cap(b.lines) {
b.lines = b.lines[:n]
b.lines[n-1] = b.lines[n-1][:0]
} else {
b.lines = append(b.lines, nil)
}
// The previous line is probably a good indicator
// of how many cells the current line will have.
// If the current line's capacity is smaller than that,
// abandon it and make a new one.
if n := len(b.lines); n >= 2 {
if prev := len(b.lines[n-2]); prev > cap(b.lines[n-1]) {
b.lines[n-1] = make([]cell, 0, prev)
}
}
}
// Reset the current state.
func (b *Writer) reset() {

View file

@ -5,7 +5,10 @@
package tabwriter_test
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"testing"
. "text/tabwriter"
)
@ -650,3 +653,79 @@ func TestPanicDuringWrite(t *testing.T) {
io.WriteString(w, "a\n\n") // the second \n triggers a call to w.Write and thus a panic
t.Errorf("failed to panic during Write")
}
func BenchmarkTable(b *testing.B) {
for _, w := range [...]int{1, 10, 100} {
// Build a line with w cells.
line := bytes.Repeat([]byte("a\t"), w)
line = append(line, '\n')
for _, h := range [...]int{10, 1000, 100000} {
b.Run(fmt.Sprintf("%dx%d", w, h), func(b *testing.B) {
b.Run("new", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
w := NewWriter(ioutil.Discard, 4, 4, 1, ' ', 0) // no particular reason for these settings
// Write the line h times.
for j := 0; j < h; j++ {
w.Write(line)
}
w.Flush()
}
})
b.Run("reuse", func(b *testing.B) {
b.ReportAllocs()
w := NewWriter(ioutil.Discard, 4, 4, 1, ' ', 0) // no particular reason for these settings
for i := 0; i < b.N; i++ {
// Write the line h times.
for j := 0; j < h; j++ {
w.Write(line)
}
w.Flush()
}
})
})
}
}
}
func BenchmarkPyramid(b *testing.B) {
for _, x := range [...]int{10, 100, 1000} {
// Build a line with x cells.
line := bytes.Repeat([]byte("a\t"), x)
line = append(line, '\n')
b.Run(fmt.Sprintf("%d", x), func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
w := NewWriter(ioutil.Discard, 4, 4, 1, ' ', 0) // no particular reason for these settings
// Write increasing prefixes of that line.
for j := 0; j < x; j++ {
w.Write(line[:j*2])
}
w.Flush()
}
})
}
}
func BenchmarkRagged(b *testing.B) {
var lines [8][]byte
for i, w := range [8]int{6, 2, 9, 5, 5, 7, 3, 8} {
// Build a line with x cells.
lines[i] = bytes.Repeat([]byte("a\t"), w)
lines[i] = append(lines[i], '\n')
}
for _, h := range [...]int{10, 100, 1000} {
b.Run(fmt.Sprintf("%d", h), func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
w := NewWriter(ioutil.Discard, 4, 4, 1, ' ', 0) // no particular reason for these settings
// Write the lines in turn h times.
for j := 0; j < h; j++ {
w.Write(lines[j%len(lines)])
}
w.Flush()
}
})
}
}