cpython/Lib/test/test_re.py
Barry Warsaw 408b6d34de Complete the absolute import patch for the test suite. All relative
imports of test modules now import from the test package.  Other
related oddities are also fixed (like DeprecationWarning filters that
weren't specifying the full import part, etc.).  Also did a general
code cleanup to remove all "from test.test_support import *"'s.  Other
from...import *'s weren't changed.
2002-07-30 23:27:12 +00:00

393 lines
12 KiB
Python

import sys
sys.path = ['.'] + sys.path
from test.test_support import verify, verbose, TestFailed
import re
import sys, os, traceback
# Misc tests from Tim Peters' re.doc
if verbose:
print 'Running tests on re.search and re.match'
try:
verify(re.search('x*', 'axx').span(0) == (0, 0))
verify(re.search('x*', 'axx').span() == (0, 0))
verify(re.search('x+', 'axx').span(0) == (1, 3))
verify(re.search('x+', 'axx').span() == (1, 3))
verify(re.search('x', 'aaa') is None)
except:
raise TestFailed, "re.search"
try:
verify(re.match('a*', 'xxx').span(0) == (0, 0))
verify(re.match('a*', 'xxx').span() == (0, 0))
verify(re.match('x*', 'xxxa').span(0) == (0, 3))
verify(re.match('x*', 'xxxa').span() == (0, 3))
verify(re.match('a+', 'xxx') is None)
except:
raise TestFailed, "re.search"
if verbose:
print 'Running tests on re.sub'
try:
verify(re.sub("(?i)b+", "x", "bbbb BBBB") == 'x x')
def bump_num(matchobj):
int_value = int(matchobj.group(0))
return str(int_value + 1)
verify(re.sub(r'\d+', bump_num, '08.2 -2 23x99y') == '9.3 -3 24x100y')
verify(re.sub(r'\d+', bump_num, '08.2 -2 23x99y', 3) == '9.3 -3 23x99y')
verify(re.sub('.', lambda m: r"\n", 'x') == '\\n')
verify(re.sub('.', r"\n", 'x') == '\n')
s = r"\1\1"
verify(re.sub('(.)', s, 'x') == 'xx')
verify(re.sub('(.)', re.escape(s), 'x') == s)
verify(re.sub('(.)', lambda m: s, 'x') == s)
verify(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx') == 'xxxx')
verify(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx') == 'xxxx')
verify(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx') == 'xxxx')
verify(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx') == 'xxxx')
verify(re.sub('a', r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D', 'a') == '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
verify(re.sub('a', '\t\n\v\r\f\a', 'a') == '\t\n\v\r\f\a')
verify(re.sub('a', '\t\n\v\r\f\a', 'a') == (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
verify(re.sub('^\s*', 'X', 'test') == 'Xtest')
# Test for sub() on escaped characters, see SF bug #449000
verify(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n') == 'abc\ndef\n')
verify(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n') == 'abc\ndef\n')
verify(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n') == 'abc\ndef\n')
verify(re.sub('\r\n', '\n', 'abc\r\ndef\r\n') == 'abc\ndef\n')
except AssertionError:
raise TestFailed, "re.sub"
try:
verify(re.sub('a', 'b', 'aaaaa') == 'bbbbb')
verify(re.sub('a', 'b', 'aaaaa', 1) == 'baaaa')
except AssertionError:
raise TestFailed, "qualified re.sub"
if verbose:
print 'Running tests on symbolic references'
try:
re.sub('(?P<a>x)', '\g<a', 'xx')
except re.error, reason:
pass
else:
raise TestFailed, "symbolic reference"
try:
re.sub('(?P<a>x)', '\g<', 'xx')
except re.error, reason:
pass
else:
raise TestFailed, "symbolic reference"
try:
re.sub('(?P<a>x)', '\g', 'xx')
except re.error, reason:
pass
else:
raise TestFailed, "symbolic reference"
try:
re.sub('(?P<a>x)', '\g<a a>', 'xx')
except re.error, reason:
pass
else:
raise TestFailed, "symbolic reference"
try:
re.sub('(?P<a>x)', '\g<1a1>', 'xx')
except re.error, reason:
pass
else:
raise TestFailed, "symbolic reference"
try:
re.sub('(?P<a>x)', '\g<ab>', 'xx')
except IndexError, reason:
pass
else:
raise TestFailed, "symbolic reference"
try:
re.sub('(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
except re.error, reason:
pass
else:
raise TestFailed, "symbolic reference"
try:
re.sub('(?P<a>x)|(?P<b>y)', '\\2', 'xx')
except re.error, reason:
pass
else:
raise TestFailed, "symbolic reference"
if verbose:
print 'Running tests on re.subn'
try:
verify(re.subn("(?i)b+", "x", "bbbb BBBB") == ('x x', 2))
verify(re.subn("b+", "x", "bbbb BBBB") == ('x BBBB', 1))
verify(re.subn("b+", "x", "xyz") == ('xyz', 0))
verify(re.subn("b*", "x", "xyz") == ('xxxyxzx', 4))
verify(re.subn("b*", "x", "xyz", 2) == ('xxxyz', 2))
except AssertionError:
raise TestFailed, "re.subn"
if verbose:
print 'Running tests on re.split'
try:
verify(re.split(":", ":a:b::c") == ['', 'a', 'b', '', 'c'])
verify(re.split(":*", ":a:b::c") == ['', 'a', 'b', 'c'])
verify(re.split("(:*)", ":a:b::c") == ['', ':', 'a', ':', 'b', '::', 'c'])
verify(re.split("(?::*)", ":a:b::c") == ['', 'a', 'b', 'c'])
verify(re.split("(:)*", ":a:b::c") == ['', ':', 'a', ':', 'b', ':', 'c'])
verify(re.split("([b:]+)", ":a:b::c") == ['', ':', 'a', ':b::', 'c'])
verify(re.split("(b)|(:+)", ":a:b::c") == \
['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c'] )
verify(re.split("(?:b)|(?::+)", ":a:b::c") == ['', 'a', '', '', 'c'])
except AssertionError:
raise TestFailed, "re.split"
try:
verify(re.split(":", ":a:b::c", 2) == ['', 'a', 'b::c'])
verify(re.split(':', 'a:b:c:d', 2) == ['a', 'b', 'c:d'])
verify(re.split("(:)", ":a:b::c", 2) == ['', ':', 'a', ':', 'b::c'])
verify(re.split("(:*)", ":a:b::c", 2) == ['', ':', 'a', ':', 'b::c'])
except AssertionError:
raise TestFailed, "qualified re.split"
if verbose:
print "Running tests on re.findall"
try:
verify(re.findall(":+", "abc") == [])
verify(re.findall(":+", "a:b::c:::d") == [":", "::", ":::"])
verify(re.findall("(:+)", "a:b::c:::d") == [":", "::", ":::"])
verify(re.findall("(:)(:*)", "a:b::c:::d") == [(":", ""),
(":", ":"),
(":", "::")] )
except AssertionError:
raise TestFailed, "re.findall"
if verbose:
print "Running tests on re.match"
try:
# No groups at all
m = re.match('a', 'a') ; verify(m.groups() == ())
# A single group
m = re.match('(a)', 'a') ; verify(m.groups() == ('a',))
pat = re.compile('((a)|(b))(c)?')
verify(pat.match('a').groups() == ('a', 'a', None, None))
verify(pat.match('b').groups() == ('b', None, 'b', None))
verify(pat.match('ac').groups() == ('a', 'a', None, 'c'))
verify(pat.match('bc').groups() == ('b', None, 'b', 'c'))
verify(pat.match('bc').groups("") == ('b', "", 'b', 'c'))
except AssertionError:
raise TestFailed, "match .groups() method"
try:
# A single group
m = re.match('(a)', 'a')
verify(m.group(0) == 'a')
verify(m.group(0) == 'a')
verify(m.group(1) == 'a')
verify(m.group(1, 1) == ('a', 'a'))
pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
verify(pat.match('a').group(1, 2, 3) == ('a', None, None))
verify(pat.match('b').group('a1', 'b2', 'c3') == (None, 'b', None))
verify(pat.match('ac').group(1, 'b2', 3) == ('a', None, 'c'))
except AssertionError:
raise TestFailed, "match .group() method"
if verbose:
print "Running tests on re.escape"
try:
p=""
for i in range(0, 256):
p = p + chr(i)
verify(re.match(re.escape(chr(i)), chr(i)) is not None)
verify(re.match(re.escape(chr(i)), chr(i)).span() == (0,1))
pat=re.compile( re.escape(p) )
verify(pat.match(p) is not None)
verify(pat.match(p).span() == (0,256))
except AssertionError:
raise TestFailed, "re.escape"
if verbose:
print 'Pickling a RegexObject instance'
import pickle
pat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
s = pickle.dumps(pat)
pat = pickle.loads(s)
try:
verify(re.I == re.IGNORECASE)
verify(re.L == re.LOCALE)
verify(re.M == re.MULTILINE)
verify(re.S == re.DOTALL)
verify(re.X == re.VERBOSE)
except AssertionError:
raise TestFailed, 're module constants'
for flags in [re.I, re.M, re.X, re.S, re.L]:
try:
r = re.compile('^pattern$', flags)
except:
print 'Exception raised on flag', flags
if verbose:
print 'Test engine limitations'
# Try nasty case that overflows the straightforward recursive
# implementation of repeated groups.
try:
verify(re.match('(x)*', 50000*'x').span() == (0, 50000))
except RuntimeError, v:
print v
from test.re_tests import *
if verbose:
print 'Running re_tests test suite'
else:
# To save time, only run the first and last 10 tests
#tests = tests[:10] + tests[-10:]
pass
for t in tests:
sys.stdout.flush()
pattern = s = outcome = repl = expected = None
if len(t) == 5:
pattern, s, outcome, repl, expected = t
elif len(t) == 3:
pattern, s, outcome = t
else:
raise ValueError, ('Test tuples should have 3 or 5 fields', t)
try:
obj = re.compile(pattern)
except re.error:
if outcome == SYNTAX_ERROR: pass # Expected a syntax error
else:
print '=== Syntax error:', t
except KeyboardInterrupt: raise KeyboardInterrupt
except:
print '*** Unexpected error ***', t
if verbose:
traceback.print_exc(file=sys.stdout)
else:
try:
result = obj.search(s)
except re.error, msg:
print '=== Unexpected exception', t, repr(msg)
if outcome == SYNTAX_ERROR:
# This should have been a syntax error; forget it.
pass
elif outcome == FAIL:
if result is None: pass # No match, as expected
else: print '=== Succeeded incorrectly', t
elif outcome == SUCCEED:
if result is not None:
# Matched, as expected, so now we compute the
# result string and compare it to our expected result.
start, end = result.span(0)
vardict={'found': result.group(0),
'groups': result.group(),
'flags': result.re.flags}
for i in range(1, 100):
try:
gi = result.group(i)
# Special hack because else the string concat fails:
if gi is None:
gi = "None"
except IndexError:
gi = "Error"
vardict['g%d' % i] = gi
for i in result.re.groupindex.keys():
try:
gi = result.group(i)
if gi is None:
gi = "None"
except IndexError:
gi = "Error"
vardict[i] = gi
repl = eval(repl, vardict)
if repl != expected:
print '=== grouping error', t,
print repr(repl) + ' should be ' + repr(expected)
else:
print '=== Failed incorrectly', t
# Try the match on a unicode string, and check that it
# still succeeds.
try:
result = obj.search(unicode(s, "latin-1"))
if result is None:
print '=== Fails on unicode match', t
except NameError:
continue # 1.5.2
except TypeError:
continue # unicode test case
# Try the match on a unicode pattern, and check that it
# still succeeds.
obj=re.compile(unicode(pattern, "latin-1"))
result = obj.search(s)
if result is None:
print '=== Fails on unicode pattern match', t
# Try the match with the search area limited to the extent
# of the match and see if it still succeeds. \B will
# break (because it won't match at the end or start of a
# string), so we'll ignore patterns that feature it.
if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
and result is not None:
obj = re.compile(pattern)
result = obj.search(s, result.start(0), result.end(0) + 1)
if result is None:
print '=== Failed on range-limited match', t
# Try the match with IGNORECASE enabled, and check that it
# still succeeds.
obj = re.compile(pattern, re.IGNORECASE)
result = obj.search(s)
if result is None:
print '=== Fails on case-insensitive match', t
# Try the match with LOCALE enabled, and check that it
# still succeeds.
obj = re.compile(pattern, re.LOCALE)
result = obj.search(s)
if result is None:
print '=== Fails on locale-sensitive match', t
# Try the match with UNICODE locale enabled, and check
# that it still succeeds.
obj = re.compile(pattern, re.UNICODE)
result = obj.search(s)
if result is None:
print '=== Fails on unicode-sensitive match', t