mirror of
https://github.com/golang/go
synced 2024-10-14 20:05:36 +00:00
strconv: add QuoteToGraphic and friends
This version of quoting allows runes in category Zs, such as the ideographic space characters, to be passed through unquoted. Still to do (maybe): A way to access this from Printf. Updates #11511. Change-Id: I3bae84b1aa0bc1b885318d3f67c5f451099a2a5a Reviewed-on: https://go-review.googlesource.com/14184 Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
This commit is contained in:
parent
9ac0fff70a
commit
74288f09dc
|
@ -635,3 +635,23 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
|
|||
0xf57a,
|
||||
0xf5a4,
|
||||
}
|
||||
|
||||
// isGraphic lists the graphic runes not matched by IsPrint.
|
||||
var isGraphic = []uint16{
|
||||
0x00a0,
|
||||
0x1680,
|
||||
0x2000,
|
||||
0x2001,
|
||||
0x2002,
|
||||
0x2003,
|
||||
0x2004,
|
||||
0x2005,
|
||||
0x2006,
|
||||
0x2007,
|
||||
0x2008,
|
||||
0x2009,
|
||||
0x200a,
|
||||
0x202f,
|
||||
0x205f,
|
||||
0x3000,
|
||||
}
|
||||
|
|
|
@ -174,6 +174,23 @@ func main() {
|
|||
}
|
||||
fmt.Fprintf(&buf, "\t%#04x,\n", r-0x10000)
|
||||
}
|
||||
fmt.Fprintf(&buf, "}\n\n")
|
||||
|
||||
// The list of graphic but not "printable" runes is short. Just make one easy table.
|
||||
fmt.Fprintf(&buf, "// isGraphic lists the graphic runes not matched by IsPrint.\n")
|
||||
fmt.Fprintf(&buf, "var isGraphic = []uint16{\n")
|
||||
for r := rune(0); r <= unicode.MaxRune; r++ {
|
||||
if unicode.IsPrint(r) != unicode.IsGraphic(r) {
|
||||
// Sanity check.
|
||||
if !unicode.IsGraphic(r) {
|
||||
log.Fatalf("%U is printable but not graphic\n", r)
|
||||
}
|
||||
if r > 0xFFFF { // We expect only 16-bit values.
|
||||
log.Fatalf("%U too big for isGraphic\n", r)
|
||||
}
|
||||
fmt.Fprintf(&buf, "\t%#04x,\n", r)
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(&buf, "}\n")
|
||||
|
||||
data, err := format.Source(buf.Bytes())
|
||||
|
|
|
@ -12,7 +12,7 @@ import (
|
|||
|
||||
const lowerhex = "0123456789abcdef"
|
||||
|
||||
func quoteWith(s string, quote byte, ASCIIonly bool) string {
|
||||
func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
|
||||
var runeTmp [utf8.UTFMax]byte
|
||||
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
|
||||
buf = append(buf, quote)
|
||||
|
@ -38,7 +38,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string {
|
|||
buf = append(buf, byte(r))
|
||||
continue
|
||||
}
|
||||
} else if IsPrint(r) {
|
||||
} else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
|
||||
n := utf8.EncodeRune(runeTmp[:], r)
|
||||
buf = append(buf, runeTmp[:n]...)
|
||||
continue
|
||||
|
@ -90,7 +90,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string {
|
|||
// control characters and non-printable characters as defined by
|
||||
// IsPrint.
|
||||
func Quote(s string) string {
|
||||
return quoteWith(s, '"', false)
|
||||
return quoteWith(s, '"', false, false)
|
||||
}
|
||||
|
||||
// AppendQuote appends a double-quoted Go string literal representing s,
|
||||
|
@ -103,7 +103,7 @@ func AppendQuote(dst []byte, s string) []byte {
|
|||
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
|
||||
// non-ASCII characters and non-printable characters as defined by IsPrint.
|
||||
func QuoteToASCII(s string) string {
|
||||
return quoteWith(s, '"', true)
|
||||
return quoteWith(s, '"', true, false)
|
||||
}
|
||||
|
||||
// AppendQuoteToASCII appends a double-quoted Go string literal representing s,
|
||||
|
@ -112,12 +112,25 @@ func AppendQuoteToASCII(dst []byte, s string) []byte {
|
|||
return append(dst, QuoteToASCII(s)...)
|
||||
}
|
||||
|
||||
// QuoteToGraphic returns a double-quoted Go string literal representing s.
|
||||
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
|
||||
// non-ASCII characters and non-printable characters as defined by IsGraphic.
|
||||
func QuoteToGraphic(s string) string {
|
||||
return quoteWith(s, '"', false, true)
|
||||
}
|
||||
|
||||
// AppendQuoteToGraphic appends a double-quoted Go string literal representing s,
|
||||
// as generated by QuoteToGraphic, to dst and returns the extended buffer.
|
||||
func AppendQuoteToGraphic(dst []byte, s string) []byte {
|
||||
return append(dst, QuoteToGraphic(s)...)
|
||||
}
|
||||
|
||||
// QuoteRune returns a single-quoted Go character literal representing the
|
||||
// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
|
||||
// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
|
||||
// for control characters and non-printable characters as defined by IsPrint.
|
||||
func QuoteRune(r rune) string {
|
||||
// TODO: avoid the allocation here.
|
||||
return quoteWith(string(r), '\'', false)
|
||||
return quoteWith(string(r), '\'', false, false)
|
||||
}
|
||||
|
||||
// AppendQuoteRune appends a single-quoted Go character literal representing the rune,
|
||||
|
@ -127,12 +140,12 @@ func AppendQuoteRune(dst []byte, r rune) []byte {
|
|||
}
|
||||
|
||||
// QuoteRuneToASCII returns a single-quoted Go character literal representing
|
||||
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
|
||||
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
|
||||
// \u0100) for non-ASCII characters and non-printable characters as defined
|
||||
// by IsPrint.
|
||||
func QuoteRuneToASCII(r rune) string {
|
||||
// TODO: avoid the allocation here.
|
||||
return quoteWith(string(r), '\'', true)
|
||||
return quoteWith(string(r), '\'', true, false)
|
||||
}
|
||||
|
||||
// AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
|
||||
|
@ -141,6 +154,21 @@ func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
|
|||
return append(dst, QuoteRuneToASCII(r)...)
|
||||
}
|
||||
|
||||
// QuoteRuneToGraphic returns a single-quoted Go character literal representing
|
||||
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
|
||||
// \u0100) for non-ASCII characters and non-printable characters as defined
|
||||
// by IsGraphic.
|
||||
func QuoteRuneToGraphic(r rune) string {
|
||||
// TODO: avoid the allocation here.
|
||||
return quoteWith(string(r), '\'', false, true)
|
||||
}
|
||||
|
||||
// AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,
|
||||
// as generated by QuoteRuneToGraphic, to dst and returns the extended buffer.
|
||||
func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
|
||||
return append(dst, QuoteRuneToGraphic(r)...)
|
||||
}
|
||||
|
||||
// CanBackquote reports whether the string s can be represented
|
||||
// unchanged as a single-line backquoted string without control
|
||||
// characters other than tab.
|
||||
|
@ -453,3 +481,26 @@ func IsPrint(r rune) bool {
|
|||
j := bsearch16(isNotPrint, uint16(r))
|
||||
return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
|
||||
}
|
||||
|
||||
// IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such
|
||||
// characters include letters, marks, numbers, punctuation, symbols, and
|
||||
// spaces, from categories L, M, N, P, S, and Zs.
|
||||
func IsGraphic(r rune) bool {
|
||||
if IsPrint(r) {
|
||||
return true
|
||||
}
|
||||
return isInGraphicList(r)
|
||||
}
|
||||
|
||||
// isInGraphicList reports whether the rune is in the isGraphic list. This separation
|
||||
// from IsGraphic allows quoteWith to avoid two calls to IsPrint.
|
||||
// Should be called only if IsPrint fails.
|
||||
func isInGraphicList(r rune) bool {
|
||||
// We know r must fit in 16 bits - see makeisprint.go.
|
||||
if r > 0xFFFF {
|
||||
return false
|
||||
}
|
||||
rr := uint16(r)
|
||||
i := bsearch16(isGraphic, rr)
|
||||
return i < len(isGraphic) && rr == isGraphic[i]
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ import (
|
|||
"unicode"
|
||||
)
|
||||
|
||||
// Verify that our isPrint agrees with unicode.IsPrint
|
||||
// Verify that our IsPrint agrees with unicode.IsPrint.
|
||||
func TestIsPrint(t *testing.T) {
|
||||
n := 0
|
||||
for r := rune(0); r <= unicode.MaxRune; r++ {
|
||||
|
@ -24,19 +24,36 @@ func TestIsPrint(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// Verify that our IsGraphic agrees with unicode.IsGraphic.
|
||||
func TestIsGraphic(t *testing.T) {
|
||||
n := 0
|
||||
for r := rune(0); r <= unicode.MaxRune; r++ {
|
||||
if IsGraphic(r) != unicode.IsGraphic(r) {
|
||||
t.Errorf("IsGraphic(%U)=%t incorrect", r, IsGraphic(r))
|
||||
n++
|
||||
if n > 10 {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type quoteTest struct {
|
||||
in string
|
||||
out string
|
||||
ascii string
|
||||
in string
|
||||
out string
|
||||
ascii string
|
||||
graphic string
|
||||
}
|
||||
|
||||
var quotetests = []quoteTest{
|
||||
{"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`},
|
||||
{"\\", `"\\"`, `"\\"`},
|
||||
{"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`},
|
||||
{"\u263a", `"☺"`, `"\u263a"`},
|
||||
{"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`},
|
||||
{"\x04", `"\x04"`, `"\x04"`},
|
||||
{"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`},
|
||||
{"\\", `"\\"`, `"\\"`, `"\\"`},
|
||||
{"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`, `"abc\xffdef"`},
|
||||
{"\u263a", `"☺"`, `"\u263a"`, `"☺"`},
|
||||
{"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`, `"\U0010ffff"`},
|
||||
{"\x04", `"\x04"`, `"\x04"`, `"\x04"`},
|
||||
// Some non-printable but graphic runes. Final column is double-quoted.
|
||||
{"!\u00a0!\u2000!\u3000!", `"!\u00a0!\u2000!\u3000!"`, `"!\u00a0!\u2000!\u3000!"`, "\"!\u00a0!\u2000!\u3000!\""},
|
||||
}
|
||||
|
||||
func TestQuote(t *testing.T) {
|
||||
|
@ -61,22 +78,38 @@ func TestQuoteToASCII(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestQuoteToGraphic(t *testing.T) {
|
||||
for _, tt := range quotetests {
|
||||
if out := QuoteToGraphic(tt.in); out != tt.graphic {
|
||||
t.Errorf("QuoteToGraphic(%s) = %s, want %s", tt.in, out, tt.graphic)
|
||||
}
|
||||
if out := AppendQuoteToGraphic([]byte("abc"), tt.in); string(out) != "abc"+tt.graphic {
|
||||
t.Errorf("AppendQuoteToGraphic(%q, %s) = %s, want %s", "abc", tt.in, out, "abc"+tt.graphic)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type quoteRuneTest struct {
|
||||
in rune
|
||||
out string
|
||||
ascii string
|
||||
in rune
|
||||
out string
|
||||
ascii string
|
||||
graphic string
|
||||
}
|
||||
|
||||
var quoterunetests = []quoteRuneTest{
|
||||
{'a', `'a'`, `'a'`},
|
||||
{'\a', `'\a'`, `'\a'`},
|
||||
{'\\', `'\\'`, `'\\'`},
|
||||
{0xFF, `'ÿ'`, `'\u00ff'`},
|
||||
{0x263a, `'☺'`, `'\u263a'`},
|
||||
{0xfffd, `'<27>'`, `'\ufffd'`},
|
||||
{0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`},
|
||||
{0x0010ffff + 1, `'<27>'`, `'\ufffd'`},
|
||||
{0x04, `'\x04'`, `'\x04'`},
|
||||
{'a', `'a'`, `'a'`, `'a'`},
|
||||
{'\a', `'\a'`, `'\a'`, `'\a'`},
|
||||
{'\\', `'\\'`, `'\\'`, `'\\'`},
|
||||
{0xFF, `'ÿ'`, `'\u00ff'`, `'ÿ'`},
|
||||
{0x263a, `'☺'`, `'\u263a'`, `'☺'`},
|
||||
{0xfffd, `'<27>'`, `'\ufffd'`, `'<27>'`},
|
||||
{0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`, `'\U0010ffff'`},
|
||||
{0x0010ffff + 1, `'<27>'`, `'\ufffd'`, `'<27>'`},
|
||||
{0x04, `'\x04'`, `'\x04'`, `'\x04'`},
|
||||
// Some differences between graphic and printable. Note the last column is double-quoted.
|
||||
{'\u00a0', `'\u00a0'`, `'\u00a0'`, "'\u00a0'"},
|
||||
{'\u2000', `'\u2000'`, `'\u2000'`, "'\u2000'"},
|
||||
{'\u3000', `'\u3000'`, `'\u3000'`, "'\u3000'"},
|
||||
}
|
||||
|
||||
func TestQuoteRune(t *testing.T) {
|
||||
|
@ -101,6 +134,17 @@ func TestQuoteRuneToASCII(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestQuoteRuneToGraphic(t *testing.T) {
|
||||
for _, tt := range quoterunetests {
|
||||
if out := QuoteRuneToGraphic(tt.in); out != tt.graphic {
|
||||
t.Errorf("QuoteRuneToGraphic(%U) = %s, want %s", tt.in, out, tt.graphic)
|
||||
}
|
||||
if out := AppendQuoteRuneToGraphic([]byte("abc"), tt.in); string(out) != "abc"+tt.graphic {
|
||||
t.Errorf("AppendQuoteRuneToGraphic(%q, %U) = %s, want %s", "abc", tt.in, out, "abc"+tt.graphic)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type canBackquoteTest struct {
|
||||
in string
|
||||
out bool
|
||||
|
|
Loading…
Reference in a new issue