regexp: interpret all Go characer escapes \a \b \f \n \r \t \v

R=rsc
CC=golang-dev
https://golang.org/cl/2042044
This commit is contained in:
Rob Pike 2010-08-30 14:06:59 +10:00
parent 9e162aa3be
commit ca3b5222eb
2 changed files with 20 additions and 5 deletions

View file

@ -57,6 +57,8 @@ var findTests = []FindTest{
FindTest{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
FindTest{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
FindTest{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
FindTest{`\a\b\f\n\r\t\v`, "\a\b\f\n\r\t\v", build(1, 0, 7)},
FindTest{`[\a\b\f\n\r\t\v]+`, "\a\b\f\n\r\t\v", build(1, 0, 7)},
FindTest{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
FindTest{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},

View file

@ -22,7 +22,8 @@
// character [ '-' character ]
//
// All characters are UTF-8-encoded code points. Backslashes escape special
// characters, including inside character classes.
// characters, including inside character classes. The standard Go character
// escapes are also recognized: \a \b \f \n \r \t \v.
//
// There are 16 methods of Regexp that match a regular expression and identify
// the matched text. Their names are matched by this regular expression:
@ -353,6 +354,18 @@ func ispunct(c int) bool {
return false
}
var escapes = []byte("abfnrtv")
var escaped = []byte("\a\b\f\n\r\t\v")
func escape(c int) int {
for i, b := range escapes {
if int(b) == c {
return i
}
}
return -1
}
func (p *parser) charClass() instr {
cc := newCharClass()
if p.c() == '^' {
@ -388,10 +401,10 @@ func (p *parser) charClass() instr {
switch {
case c == endOfFile:
p.error(ErrExtraneousBackslash)
case c == 'n':
c = '\n'
case ispunct(c):
// c is as delivered
case escape(c) >= 0:
c = int(escaped[escape(c)])
default:
p.error(ErrBadBackslash)
}
@ -483,10 +496,10 @@ func (p *parser) term() (start, end instr) {
switch {
case c == endOfFile:
p.error(ErrExtraneousBackslash)
case c == 'n':
c = '\n'
case ispunct(c):
// c is as delivered
case escape(c) >= 0:
c = int(escaped[escape(c)])
default:
p.error(ErrBadBackslash)
}