Issue #19365: Optimized the parsing of long replacement string in re.sub*()

functions.
This commit is contained in:
Serhiy Storchaka 2013-10-23 22:27:52 +03:00
parent 4d397008cd
commit 9c15ec1ce6
2 changed files with 33 additions and 44 deletions

View file

@ -769,35 +769,33 @@ def parse_template(source, pattern):
# group references
s = Tokenizer(source)
sget = s.get
p = []
a = p.append
def literal(literal, p=p, pappend=a):
if p and p[-1][0] is LITERAL:
p[-1] = LITERAL, p[-1][1] + literal
else:
pappend((LITERAL, literal))
sep = source[:0]
if isinstance(sep, str):
makechar = chr
else:
makechar = chr
while 1:
groups = []
literals = []
literal = []
lappend = literal.append
def addgroup(index):
if literal:
literals.append(''.join(literal))
del literal[:]
groups.append((len(literals), index))
literals.append(None)
while True:
this = sget()
if this is None:
break # end of replacement string
if this and this[0] == "\\":
if this[0] == "\\":
# group
c = this[1:2]
c = this[1]
if c == "g":
name = ""
if s.match("<"):
while 1:
while True:
char = sget()
if char is None:
raise error("unterminated group name")
if char == ">":
break
name = name + char
name += char
if not name:
raise error("missing group name")
try:
@ -811,50 +809,38 @@ def literal(literal, p=p, pappend=a):
index = pattern.groupindex[name]
except KeyError:
raise IndexError("unknown group name")
a((MARK, index))
addgroup(index)
elif c == "0":
if s.next in OCTDIGITS:
this = this + sget()
this += sget()
if s.next in OCTDIGITS:
this = this + sget()
literal(makechar(int(this[1:], 8) & 0xff))
this += sget()
lappend(chr(int(this[1:], 8) & 0xff))
elif c in DIGITS:
isoctal = False
if s.next in DIGITS:
this = this + sget()
this += sget()
if (c in OCTDIGITS and this[2] in OCTDIGITS and
s.next in OCTDIGITS):
this = this + sget()
this += sget()
isoctal = True
literal(makechar(int(this[1:], 8) & 0xff))
lappend(chr(int(this[1:], 8) & 0xff))
if not isoctal:
a((MARK, int(this[1:])))
addgroup(int(this[1:]))
else:
try:
this = makechar(ESCAPES[this][1])
this = chr(ESCAPES[this][1])
except KeyError:
pass
literal(this)
lappend(this)
else:
literal(this)
# convert template to groups and literals lists
i = 0
groups = []
groupsappend = groups.append
literals = [None] * len(p)
if isinstance(source, str):
encode = lambda x: x
else:
lappend(this)
if literal:
literals.append(''.join(literal))
if not isinstance(source, str):
# The tokenizer implicitly decodes bytes objects as latin-1, we must
# therefore re-encode the final representation.
encode = lambda x: x.encode('latin-1')
for c, s in p:
if c is MARK:
groupsappend((i, s))
# literal[i] is already None
else:
literals[i] = encode(s)
i = i + 1
literals = [None if s is None else s.encode('latin-1') for s in literals]
return groups, literals
def expand_template(template, match):

View file

@ -19,6 +19,9 @@ Core and Builtins
Library
-------
- Issue #19365: Optimized the parsing of long replacement string in re.sub*()
functions.
- Issue #19352: Fix unittest discovery when a module can be reached
through several paths (e.g. under Debian/Ubuntu with virtualenv).