From e6ebbefaf848604c8df3e2a58e146948b03e608b Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Mon, 14 Nov 2022 12:02:23 -0800 Subject: [PATCH] net/url, net/http/httputil: accept invalid percent encodings Per https://url.spec.whatwg.org/#percent-encoded-bytes an invalid percent encoding should be handled as ordinary text. Fixes #56732 Change-Id: Ib0259dfd704922905289eebaacbf722e28f6d636 Reviewed-on: https://go-review.googlesource.com/c/go/+/450375 Run-TryBot: Ian Lance Taylor Reviewed-by: Damien Neil Reviewed-by: Ian Lance Taylor Auto-Submit: Ian Lance Taylor Run-TryBot: Ian Lance Taylor TryBot-Result: Gopher Robot --- src/net/http/httputil/reverseproxy.go | 27 +-------------- src/net/http/httputil/reverseproxy_test.go | 2 +- src/net/url/url.go | 26 ++++++++++----- src/net/url/url_test.go | 39 +++++++++------------- 4 files changed, 35 insertions(+), 59 deletions(-) diff --git a/src/net/http/httputil/reverseproxy.go b/src/net/http/httputil/reverseproxy.go index 190279ca00..ad0221ff33 100644 --- a/src/net/http/httputil/reverseproxy.go +++ b/src/net/http/httputil/reverseproxy.go @@ -816,34 +816,9 @@ func (c switchProtocolCopier) copyToBackend(errc chan<- error) { } func cleanQueryParams(s string) string { - reencode := func(s string) string { + if strings.Contains(s, ";") { v, _ := url.ParseQuery(s) return v.Encode() } - for i := 0; i < len(s); { - switch s[i] { - case ';': - return reencode(s) - case '%': - if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { - return reencode(s) - } - i += 3 - default: - i++ - } - } return s } - -func ishex(c byte) bool { - switch { - case '0' <= c && c <= '9': - return true - case 'a' <= c && c <= 'f': - return true - case 'A' <= c && c <= 'F': - return true - } - return false -} diff --git a/src/net/http/httputil/reverseproxy_test.go b/src/net/http/httputil/reverseproxy_test.go index 5b882d3a45..5a0237494c 100644 --- a/src/net/http/httputil/reverseproxy_test.go +++ b/src/net/http/httputil/reverseproxy_test.go @@ -1831,7 +1831,7 @@ func testReverseProxyQueryParameterSmuggling(t *testing.T, wantCleanQuery bool, cleanQuery: "a=1", }, { rawQuery: "a=1&a=%zz&b=3", - cleanQuery: "a=1&b=3", + cleanQuery: "a=1&a=%zz&b=3", }} { res, err := frontend.Client().Get(frontend.URL + "?" + test.rawQuery) if err != nil { diff --git a/src/net/url/url.go b/src/net/url/url.go index d530a50d40..e959ed4ee6 100644 --- a/src/net/url/url.go +++ b/src/net/url/url.go @@ -198,20 +198,24 @@ func PathUnescape(s string) (string, error) { // unescape unescapes a string; the mode specifies // which section of the URL string is being unescaped. func unescape(s string, mode encoding) (string, error) { + isPercentEscape := func(s string, i int) bool { + return i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2]) + } + // Count %, check that they're well-formed. n := 0 hasPlus := false for i := 0; i < len(s); { switch s[i] { case '%': - n++ - if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { - s = s[i:] - if len(s) > 3 { - s = s[:3] - } - return "", EscapeError(s) + if !isPercentEscape(s, i) { + // https://url.spec.whatwg.org/#percent-encoded-bytes + // says that % followed by non-hex characters + // should be accepted with no error. + i++ + continue } + n++ // Per https://tools.ietf.org/html/rfc3986#page-21 // in the host component %-encoding can only be used // for non-ASCII bytes. @@ -255,8 +259,12 @@ func unescape(s string, mode encoding) (string, error) { for i := 0; i < len(s); i++ { switch s[i] { case '%': - t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2])) - i += 2 + if !isPercentEscape(s, i) { + t.WriteByte('%') + } else { + t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2])) + i += 2 + } case '+': if mode == encodeQueryComponent { t.WriteByte(' ') diff --git a/src/net/url/url_test.go b/src/net/url/url_test.go index 577cf631c8..899ec99e43 100644 --- a/src/net/url/url_test.go +++ b/src/net/url/url_test.go @@ -704,9 +704,11 @@ var parseRequestURLTests = []struct { // These two cases are valid as textual representations as // described in RFC 4007, but are not valid as address // literals with IPv6 zone identifiers in URIs as described in - // RFC 6874. - {"http://[fe80::1%en0]/", false}, - {"http://[fe80::1%en0]:8080/", false}, + // RFC 6874. However, this seems to be overridden by + // https://url.spec.whatwg.org/#percent-encoded-bytes + // which permits unencoded % characters. + {"http://[fe80::1%en0]/", true}, + {"http://[fe80::1%en0]:8080/", true}, } func TestParseRequestURI(t *testing.T) { @@ -896,28 +898,28 @@ var unescapeTests = []EscapeTest{ }, { "%", // not enough characters after % - "", - EscapeError("%"), + "%", + nil, }, { "%a", // not enough characters after % - "", - EscapeError("%a"), + "%a", + nil, }, { "%1", // not enough characters after % - "", - EscapeError("%1"), + "%1", + nil, }, { "123%45%6", // not enough characters after % - "", - EscapeError("%6"), + "123E%6", + nil, }, { "%zzzzz", // invalid hex digits - "", - EscapeError("%zz"), + "%zzzzz", + nil, }, { "a+b", @@ -1591,16 +1593,6 @@ func TestRequestURI(t *testing.T) { } } -func TestParseFailure(t *testing.T) { - // Test that the first parse error is returned. - const url = "%gh&%ij" - _, err := ParseQuery(url) - errStr := fmt.Sprint(err) - if !strings.Contains(errStr, "%gh") { - t.Errorf(`ParseQuery(%q) returned error %q, want something containing %q"`, url, errStr, "%gh") - } -} - func TestParseErrors(t *testing.T) { tests := []struct { in string @@ -2118,6 +2110,7 @@ func TestJoinPath(t *testing.T) { { base: "http://[fe80::1%en0]:8080/", elem: []string{"/go"}, + out: "http://[fe80::1%25en0]:8080/go", }, { base: "https://go.googlesource.com",