diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go index 9ab199ceb6..85d4f9fd76 100644 --- a/src/pkg/bytes/bytes.go +++ b/src/pkg/bytes/bytes.go @@ -207,7 +207,8 @@ func HasSuffix(s, suffix []byte) bool { } // Map returns a copy of the byte array s with all its characters modified -// according to the mapping function. +// according to the mapping function. If mapping returns a negative value, the character is +// dropped from the string with no replacement. func Map(mapping func(rune int) int, s []byte) []byte { // In the worst case, the array can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's @@ -222,16 +223,18 @@ func Map(mapping func(rune int) int, s []byte) []byte { rune, wid = utf8.DecodeRune(s[i:]) } rune = mapping(rune); - if nbytes+utf8.RuneLen(rune) > maxbytes { - // Grow the buffer. - maxbytes = maxbytes*2 + utf8.UTFMax; - nb := make([]byte, maxbytes); - for i, c := range b[0:nbytes] { - nb[i] = c + if rune >= 0 { + if nbytes+utf8.RuneLen(rune) > maxbytes { + // Grow the buffer. + maxbytes = maxbytes*2 + utf8.UTFMax; + nb := make([]byte, maxbytes); + for i, c := range b[0:nbytes] { + nb[i] = c + } + b = nb; } - b = nb; + nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]); } - nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]); i += wid; } return b[0:nbytes]; diff --git a/src/pkg/bytes/bytes_test.go b/src/pkg/bytes/bytes_test.go index 553ceb7c5a..3f77e6e9ff 100644 --- a/src/pkg/bytes/bytes_test.go +++ b/src/pkg/bytes/bytes_test.go @@ -365,6 +365,19 @@ func TestMap(t *testing.T) { if string(m) != expect { t.Errorf("rot13: expected %q got %q", expect, m) } + + // 5. Drop + dropNotLatin := func(rune int) int { + if unicode.Is(unicode.Latin, rune) { + return rune + } + return -1; + }; + m = Map(dropNotLatin, Bytes("Hello, 세계")); + expect = "Hello"; + if string(m) != expect { + t.Errorf("drop: expected %q got %q", expect, m) + } } func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go index 7be98e6c10..4e375b4d5b 100644 --- a/src/pkg/strings/strings.go +++ b/src/pkg/strings/strings.go @@ -178,7 +178,8 @@ func HasSuffix(s, suffix string) bool { } // Map returns a copy of the string s with all its characters modified -// according to the mapping function. +// according to the mapping function. If mapping returns a negative value, the character is +// dropped from the string with no replacement. func Map(mapping func(rune int) int, s string) string { // In the worst case, the string can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's @@ -188,20 +189,22 @@ func Map(mapping func(rune int) int, s string) string { b := make([]byte, maxbytes); for _, c := range s { rune := mapping(c); - wid := 1; - if rune >= utf8.RuneSelf { - wid = utf8.RuneLen(rune) - } - if nbytes+wid > maxbytes { - // Grow the buffer. - maxbytes = maxbytes*2 + utf8.UTFMax; - nb := make([]byte, maxbytes); - for i, c := range b[0:nbytes] { - nb[i] = c + if rune >= 0 { + wid := 1; + if rune >= utf8.RuneSelf { + wid = utf8.RuneLen(rune) } - b = nb; + if nbytes+wid > maxbytes { + // Grow the buffer. + maxbytes = maxbytes*2 + utf8.UTFMax; + nb := make([]byte, maxbytes); + for i, c := range b[0:nbytes] { + nb[i] = c + } + b = nb; + } + nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]); } - nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]); } return string(b[0:nbytes]); } diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go index ce77c5c2f2..e3e7f38aed 100644 --- a/src/pkg/strings/strings_test.go +++ b/src/pkg/strings/strings_test.go @@ -279,6 +279,19 @@ func TestMap(t *testing.T) { if m != expect { t.Errorf("rot13: expected %q got %q", expect, m) } + + // 5. Drop + dropNotLatin := func(rune int) int { + if unicode.Is(unicode.Latin, rune) { + return rune + } + return -1; + }; + m = Map(dropNotLatin, "Hello, 세계"); + expect = "Hello"; + if m != expect { + t.Errorf("drop: expected %q got %q", expect, m) + } } func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } diff --git a/src/pkg/xml/read.go b/src/pkg/xml/read.go index 5685787230..d8ee78a123 100644 --- a/src/pkg/xml/read.go +++ b/src/pkg/xml/read.go @@ -10,6 +10,7 @@ import ( "os"; "reflect"; "strings"; + "unicode"; ) // BUG(rsc): Mapping between XML elements and data structures is inherently flawed: @@ -144,6 +145,20 @@ func (p *Parser) Unmarshal(val interface{}, start *StartElement) os.Error { return p.unmarshal(v.Elem(), start); } +// fieldName strips invalid characters from an XML name +// to create a valid Go struct name. It also converts the +// name to lower case letters. +func fieldName(original string) string { + return strings.Map( + func(x int) int { + if unicode.IsDigit(x) || unicode.IsLetter(x) { + return unicode.ToLower(x) + } + return -1; + }, + original) +} + // Unmarshal a single XML element into val. func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error { // Find start element if we need it. @@ -269,7 +284,7 @@ func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error { val := ""; k := strings.ToLower(f.Name); for _, a := range start.Attr { - if strings.ToLower(a.Name.Local) == k { + if fieldName(a.Name.Local) == k { val = a.Value; break; } @@ -303,7 +318,7 @@ Loop: // Look up by tag name. // If that fails, fall back to mop-up field named "Any". if sv != nil { - k := strings.ToLower(t.Name.Local); + k := fieldName(t.Name.Local); any := -1; for i, n := 0, styp.NumField(); i < n; i++ { f := styp.Field(i); diff --git a/src/pkg/xml/read_test.go b/src/pkg/xml/read_test.go index 14ad11a318..ca9b30d9b7 100644 --- a/src/pkg/xml/read_test.go +++ b/src/pkg/xml/read_test.go @@ -24,8 +24,8 @@ func TestUnmarshalFeed(t *testing.T) { // hget http://codereview.appspot.com/rss/mine/rsc const rssFeedString = ` -Code Review - My issueshttp://codereview.appspot.com/2009-10-04T01:35:58+00:00rietveldrietveld: an attempt at pubsubhubbub -2009-10-04T01:35:58+00:00email-address-removedurn:md5:134d9179c41f806be79b3a5f7877d19a +Code Review - My issueshttp://codereview.appspot.com/2009-10-04T01:35:58+00:00rietveldrietveld: an attempt at pubsubhubbub +2009-10-04T01:35:58+00:00email-address-removedurn:md5:134d9179c41f806be79b3a5f7877d19a An attempt at adding pubsubhubbub support to Rietveld. http://code.google.com/p/pubsubhubbub http://code.google.com/p/rietveld/issues/detail?id=155 @@ -208,3 +208,21 @@ not being used from outside intra_region_diff.py. }, }, } + +type FieldNameTest struct { + in, out string; +} + +var FieldNameTests = []FieldNameTest{ + FieldNameTest{"Profile-Image", "profileimage"}, + FieldNameTest{"_score", "score"}, +} + +func TestFieldName(t *testing.T) { + for _, tt := range FieldNameTests { + a := fieldName(tt.in); + if a != tt.out { + t.Fatalf("have %#v\nwant %#v\n\n", a, tt.out) + } + } +}