net/url, net/http/httputil: accept invalid percent encodings

Per https://url.spec.whatwg.org/#percent-encoded-bytes an invalid
percent encoding should be handled as ordinary text.

Fixes #56732

Change-Id: Ib0259dfd704922905289eebaacbf722e28f6d636
Reviewed-on: https://go-review.googlesource.com/c/go/+/450375
Run-TryBot: Ian Lance Taylor <iant@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Ian Lance Taylor 2022-11-14 12:02:23 -08:00 committed by Gopher Robot
parent 2b59307ac2
commit e6ebbefaf8
4 changed files with 35 additions and 59 deletions

View file

@ -816,34 +816,9 @@ func (c switchProtocolCopier) copyToBackend(errc chan<- error) {
} }
func cleanQueryParams(s string) string { func cleanQueryParams(s string) string {
reencode := func(s string) string { if strings.Contains(s, ";") {
v, _ := url.ParseQuery(s) v, _ := url.ParseQuery(s)
return v.Encode() return v.Encode()
} }
for i := 0; i < len(s); {
switch s[i] {
case ';':
return reencode(s)
case '%':
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
return reencode(s)
}
i += 3
default:
i++
}
}
return s return s
} }
func ishex(c byte) bool {
switch {
case '0' <= c && c <= '9':
return true
case 'a' <= c && c <= 'f':
return true
case 'A' <= c && c <= 'F':
return true
}
return false
}

View file

@ -1831,7 +1831,7 @@ func testReverseProxyQueryParameterSmuggling(t *testing.T, wantCleanQuery bool,
cleanQuery: "a=1", cleanQuery: "a=1",
}, { }, {
rawQuery: "a=1&a=%zz&b=3", rawQuery: "a=1&a=%zz&b=3",
cleanQuery: "a=1&b=3", cleanQuery: "a=1&a=%zz&b=3",
}} { }} {
res, err := frontend.Client().Get(frontend.URL + "?" + test.rawQuery) res, err := frontend.Client().Get(frontend.URL + "?" + test.rawQuery)
if err != nil { if err != nil {

View file

@ -198,20 +198,24 @@ func PathUnescape(s string) (string, error) {
// unescape unescapes a string; the mode specifies // unescape unescapes a string; the mode specifies
// which section of the URL string is being unescaped. // which section of the URL string is being unescaped.
func unescape(s string, mode encoding) (string, error) { func unescape(s string, mode encoding) (string, error) {
isPercentEscape := func(s string, i int) bool {
return i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2])
}
// Count %, check that they're well-formed. // Count %, check that they're well-formed.
n := 0 n := 0
hasPlus := false hasPlus := false
for i := 0; i < len(s); { for i := 0; i < len(s); {
switch s[i] { switch s[i] {
case '%': case '%':
n++ if !isPercentEscape(s, i) {
if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { // https://url.spec.whatwg.org/#percent-encoded-bytes
s = s[i:] // says that % followed by non-hex characters
if len(s) > 3 { // should be accepted with no error.
s = s[:3] i++
} continue
return "", EscapeError(s)
} }
n++
// Per https://tools.ietf.org/html/rfc3986#page-21 // Per https://tools.ietf.org/html/rfc3986#page-21
// in the host component %-encoding can only be used // in the host component %-encoding can only be used
// for non-ASCII bytes. // for non-ASCII bytes.
@ -255,8 +259,12 @@ func unescape(s string, mode encoding) (string, error) {
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
switch s[i] { switch s[i] {
case '%': case '%':
t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2])) if !isPercentEscape(s, i) {
i += 2 t.WriteByte('%')
} else {
t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2]))
i += 2
}
case '+': case '+':
if mode == encodeQueryComponent { if mode == encodeQueryComponent {
t.WriteByte(' ') t.WriteByte(' ')

View file

@ -704,9 +704,11 @@ var parseRequestURLTests = []struct {
// These two cases are valid as textual representations as // These two cases are valid as textual representations as
// described in RFC 4007, but are not valid as address // described in RFC 4007, but are not valid as address
// literals with IPv6 zone identifiers in URIs as described in // literals with IPv6 zone identifiers in URIs as described in
// RFC 6874. // RFC 6874. However, this seems to be overridden by
{"http://[fe80::1%en0]/", false}, // https://url.spec.whatwg.org/#percent-encoded-bytes
{"http://[fe80::1%en0]:8080/", false}, // which permits unencoded % characters.
{"http://[fe80::1%en0]/", true},
{"http://[fe80::1%en0]:8080/", true},
} }
func TestParseRequestURI(t *testing.T) { func TestParseRequestURI(t *testing.T) {
@ -896,28 +898,28 @@ var unescapeTests = []EscapeTest{
}, },
{ {
"%", // not enough characters after % "%", // not enough characters after %
"", "%",
EscapeError("%"), nil,
}, },
{ {
"%a", // not enough characters after % "%a", // not enough characters after %
"", "%a",
EscapeError("%a"), nil,
}, },
{ {
"%1", // not enough characters after % "%1", // not enough characters after %
"", "%1",
EscapeError("%1"), nil,
}, },
{ {
"123%45%6", // not enough characters after % "123%45%6", // not enough characters after %
"", "123E%6",
EscapeError("%6"), nil,
}, },
{ {
"%zzzzz", // invalid hex digits "%zzzzz", // invalid hex digits
"", "%zzzzz",
EscapeError("%zz"), nil,
}, },
{ {
"a+b", "a+b",
@ -1591,16 +1593,6 @@ func TestRequestURI(t *testing.T) {
} }
} }
func TestParseFailure(t *testing.T) {
// Test that the first parse error is returned.
const url = "%gh&%ij"
_, err := ParseQuery(url)
errStr := fmt.Sprint(err)
if !strings.Contains(errStr, "%gh") {
t.Errorf(`ParseQuery(%q) returned error %q, want something containing %q"`, url, errStr, "%gh")
}
}
func TestParseErrors(t *testing.T) { func TestParseErrors(t *testing.T) {
tests := []struct { tests := []struct {
in string in string
@ -2118,6 +2110,7 @@ func TestJoinPath(t *testing.T) {
{ {
base: "http://[fe80::1%en0]:8080/", base: "http://[fe80::1%en0]:8080/",
elem: []string{"/go"}, elem: []string{"/go"},
out: "http://[fe80::1%25en0]:8080/go",
}, },
{ {
base: "https://go.googlesource.com", base: "https://go.googlesource.com",