encoding/csv: port the go-fuzz function to native fuzzing

Beyond the required file move and refactor to use the testing package,
a number of changes were made to get the fuzzing working properly.

First, add more logs to see what is going on.

Second, some option combinations set Comma to the null character,
which simply never worked at all. I suspect the author meant to leave
the comma character as the default instead.
This was spotted thanks to the added logging.

Third, the round-trip DeepEqual check did not work at all
when any comments were involved, as the writer does not support them.

Fourth and last, massage the first and second parsed records before
comparing them with DeepEqual, as the nature of Reader and Writer
causes empty quoted records and CRLF sequences to change.

With all the changes above, the fuzzing function appears to work
normally on my laptop now. I fuzzed for a solid five minutes and
could no longer encounter any errors or panics.

Change-Id: Ie27f65f66099bdaa076343cee18b480803d2e4d3
Reviewed-on: https://go-review.googlesource.com/c/go/+/576375
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Joseph Tsai <joetsai@digital-static.net>
Reviewed-by: Ian Lance Taylor <iant@google.com>
This commit is contained in:
Daniel Martí 2024-04-04 13:05:13 +09:00
parent fd999fda59
commit 2e064cf144
2 changed files with 96 additions and 70 deletions

View file

@ -1,70 +0,0 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build gofuzz
package csv
import (
"bytes"
"fmt"
"reflect"
)
func Fuzz(data []byte) int {
score := 0
buf := new(bytes.Buffer)
for _, tt := range []Reader{
{},
{Comma: ';'},
{Comma: '\t'},
{LazyQuotes: true},
{TrimLeadingSpace: true},
{Comment: '#'},
{Comment: ';'},
} {
r := NewReader(bytes.NewReader(data))
r.Comma = tt.Comma
r.Comment = tt.Comment
r.LazyQuotes = tt.LazyQuotes
r.TrimLeadingSpace = tt.TrimLeadingSpace
records, err := r.ReadAll()
if err != nil {
continue
}
score = 1
buf.Reset()
w := NewWriter(buf)
w.Comma = tt.Comma
err = w.WriteAll(records)
if err != nil {
fmt.Printf("writer = %#v\n", w)
fmt.Printf("records = %v\n", records)
panic(err)
}
r = NewReader(buf)
r.Comma = tt.Comma
r.Comment = tt.Comment
r.LazyQuotes = tt.LazyQuotes
r.TrimLeadingSpace = tt.TrimLeadingSpace
result, err := r.ReadAll()
if err != nil {
fmt.Printf("reader = %#v\n", r)
fmt.Printf("records = %v\n", records)
panic(err)
}
if !reflect.DeepEqual(records, result) {
fmt.Println("records = \n", records)
fmt.Println("result = \n", records)
panic("not equal")
}
}
return score
}

View file

@ -0,0 +1,96 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package csv
import (
"bytes"
"reflect"
"slices"
"strings"
"testing"
)
func FuzzRoundtrip(f *testing.F) {
f.Fuzz(func(t *testing.T, in []byte) {
buf := new(bytes.Buffer)
t.Logf("input = %q", in)
for _, tt := range []Reader{
{Comma: ','},
{Comma: ';'},
{Comma: '\t'},
{Comma: ',', LazyQuotes: true},
{Comma: ',', TrimLeadingSpace: true},
{Comma: ',', Comment: '#'},
{Comma: ',', Comment: ';'},
} {
t.Logf("With options:")
t.Logf(" Comma = %q", tt.Comma)
t.Logf(" LazyQuotes = %t", tt.LazyQuotes)
t.Logf(" TrimLeadingSpace = %t", tt.TrimLeadingSpace)
t.Logf(" Comment = %q", tt.Comment)
r := NewReader(bytes.NewReader(in))
r.Comma = tt.Comma
r.Comment = tt.Comment
r.LazyQuotes = tt.LazyQuotes
r.TrimLeadingSpace = tt.TrimLeadingSpace
records, err := r.ReadAll()
if err != nil {
continue
}
t.Logf("first records = %#v", records)
buf.Reset()
w := NewWriter(buf)
w.Comma = tt.Comma
err = w.WriteAll(records)
if err != nil {
t.Logf("writer = %#v\n", w)
t.Logf("records = %v\n", records)
t.Fatal(err)
}
if tt.Comment != 0 {
// Writer doesn't support comments, so it can turn the quoted record "#"
// into a non-quoted comment line, failing the roundtrip check below.
continue
}
t.Logf("second input = %q", buf.Bytes())
r = NewReader(buf)
r.Comma = tt.Comma
r.Comment = tt.Comment
r.LazyQuotes = tt.LazyQuotes
r.TrimLeadingSpace = tt.TrimLeadingSpace
result, err := r.ReadAll()
if err != nil {
t.Logf("reader = %#v\n", r)
t.Logf("records = %v\n", records)
t.Fatal(err)
}
// The reader turns \r\n into \n.
for _, record := range records {
for i, s := range record {
record[i] = strings.ReplaceAll(s, "\r\n", "\n")
}
}
// Note that the reader parses the quoted record "" as an empty string,
// and the writer turns that into an empty line, which the reader skips over.
// Filter those out to avoid false positives.
records = slices.DeleteFunc(records, func(record []string) bool {
return len(record) == 1 && record[0] == ""
})
// The reader uses nil when returning no records at all.
if len(records) == 0 {
records = nil
}
if !reflect.DeepEqual(records, result) {
t.Fatalf("first read got %#v, second got %#v", records, result)
}
}
})
}