gh-91404: Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or allocation failure (GH-32283) (#93882)

Revert "bpo-23689: re module, fix memory leak when a match is terminated by a signal or memory allocation failure (GH-32283)"

This reverts commit 6e3eee5c11.

Manual fixups to increase the MAGIC number and to handle conflicts with
a couple of changes that landed after that.

Thanks for reviews by Ma Lin and Serhiy Storchaka.
This commit is contained in:
Gregory P. Smith 2022-06-17 01:19:44 -07:00 committed by GitHub
parent 538f28921f
commit 4beee0c7b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 74 additions and 146 deletions

View file

@ -28,21 +28,14 @@
POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE), POSSESSIVE_REPEAT: (POSSESSIVE_REPEAT, SUCCESS, POSSESSIVE_REPEAT_ONE),
} }
class _CompileData:
__slots__ = ('code', 'repeat_count')
def __init__(self):
self.code = []
self.repeat_count = 0
def _combine_flags(flags, add_flags, del_flags, def _combine_flags(flags, add_flags, del_flags,
TYPE_FLAGS=_parser.TYPE_FLAGS): TYPE_FLAGS=_parser.TYPE_FLAGS):
if add_flags & TYPE_FLAGS: if add_flags & TYPE_FLAGS:
flags &= ~TYPE_FLAGS flags &= ~TYPE_FLAGS
return (flags | add_flags) & ~del_flags return (flags | add_flags) & ~del_flags
def _compile(data, pattern, flags): def _compile(code, pattern, flags):
# internal: compile a (sub)pattern # internal: compile a (sub)pattern
code = data.code
emit = code.append emit = code.append
_len = len _len = len
LITERAL_CODES = _LITERAL_CODES LITERAL_CODES = _LITERAL_CODES
@ -115,7 +108,7 @@ def _compile(data, pattern, flags):
skip = _len(code); emit(0) skip = _len(code); emit(0)
emit(av[0]) emit(av[0])
emit(av[1]) emit(av[1])
_compile(data, av[2], flags) _compile(code, av[2], flags)
emit(SUCCESS) emit(SUCCESS)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
else: else:
@ -123,11 +116,7 @@ def _compile(data, pattern, flags):
skip = _len(code); emit(0) skip = _len(code); emit(0)
emit(av[0]) emit(av[0])
emit(av[1]) emit(av[1])
# now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT) _compile(code, av[2], flags)
if op != POSSESSIVE_REPEAT:
emit(data.repeat_count)
data.repeat_count += 1
_compile(data, av[2], flags)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
emit(REPEATING_CODES[op][1]) emit(REPEATING_CODES[op][1])
elif op is SUBPATTERN: elif op is SUBPATTERN:
@ -136,7 +125,7 @@ def _compile(data, pattern, flags):
emit(MARK) emit(MARK)
emit((group-1)*2) emit((group-1)*2)
# _compile_info(code, p, _combine_flags(flags, add_flags, del_flags)) # _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
_compile(data, p, _combine_flags(flags, add_flags, del_flags)) _compile(code, p, _combine_flags(flags, add_flags, del_flags))
if group: if group:
emit(MARK) emit(MARK)
emit((group-1)*2+1) emit((group-1)*2+1)
@ -148,7 +137,7 @@ def _compile(data, pattern, flags):
# pop their stack if they reach it # pop their stack if they reach it
emit(ATOMIC_GROUP) emit(ATOMIC_GROUP)
skip = _len(code); emit(0) skip = _len(code); emit(0)
_compile(data, av, flags) _compile(code, av, flags)
emit(SUCCESS) emit(SUCCESS)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
elif op in SUCCESS_CODES: elif op in SUCCESS_CODES:
@ -163,7 +152,7 @@ def _compile(data, pattern, flags):
if lo != hi: if lo != hi:
raise error("look-behind requires fixed-width pattern") raise error("look-behind requires fixed-width pattern")
emit(lo) # look behind emit(lo) # look behind
_compile(data, av[1], flags) _compile(code, av[1], flags)
emit(SUCCESS) emit(SUCCESS)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
elif op is AT: elif op is AT:
@ -182,7 +171,7 @@ def _compile(data, pattern, flags):
for av in av[1]: for av in av[1]:
skip = _len(code); emit(0) skip = _len(code); emit(0)
# _compile_info(code, av, flags) # _compile_info(code, av, flags)
_compile(data, av, flags) _compile(code, av, flags)
emit(JUMP) emit(JUMP)
tailappend(_len(code)); emit(0) tailappend(_len(code)); emit(0)
code[skip] = _len(code) - skip code[skip] = _len(code) - skip
@ -210,12 +199,12 @@ def _compile(data, pattern, flags):
emit(op) emit(op)
emit(av[0]-1) emit(av[0]-1)
skipyes = _len(code); emit(0) skipyes = _len(code); emit(0)
_compile(data, av[1], flags) _compile(code, av[1], flags)
if av[2]: if av[2]:
emit(JUMP) emit(JUMP)
skipno = _len(code); emit(0) skipno = _len(code); emit(0)
code[skipyes] = _len(code) - skipyes + 1 code[skipyes] = _len(code) - skipyes + 1
_compile(data, av[2], flags) _compile(code, av[2], flags)
code[skipno] = _len(code) - skipno code[skipno] = _len(code) - skipno
else: else:
code[skipyes] = _len(code) - skipyes + 1 code[skipyes] = _len(code) - skipyes + 1
@ -582,17 +571,17 @@ def isstring(obj):
def _code(p, flags): def _code(p, flags):
flags = p.state.flags | flags flags = p.state.flags | flags
data = _CompileData() code = []
# compile info block # compile info block
_compile_info(data.code, p, flags) _compile_info(code, p, flags)
# compile the pattern # compile the pattern
_compile(data, p.data, flags) _compile(code, p.data, flags)
data.code.append(SUCCESS) code.append(SUCCESS)
return data return code
def _hex_code(code): def _hex_code(code):
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code) return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
@ -693,7 +682,7 @@ def print_2(*args):
else: else:
print_(FAILURE) print_(FAILURE)
i += 1 i += 1
elif op in (REPEAT_ONE, MIN_REPEAT_ONE, elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE,
POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE): POSSESSIVE_REPEAT, POSSESSIVE_REPEAT_ONE):
skip, min, max = code[i: i+3] skip, min, max = code[i: i+3]
if max == MAXREPEAT: if max == MAXREPEAT:
@ -701,13 +690,6 @@ def print_2(*args):
print_(op, skip, min, max, to=i+skip) print_(op, skip, min, max, to=i+skip)
dis_(i+3, i+skip) dis_(i+3, i+skip)
i += skip i += skip
elif op is REPEAT:
skip, min, max, repeat_index = code[i: i+4]
if max == MAXREPEAT:
max = 'MAXREPEAT'
print_(op, skip, min, max, repeat_index, to=i+skip)
dis_(i+4, i+skip)
i += skip
elif op is GROUPREF_EXISTS: elif op is GROUPREF_EXISTS:
arg, skip = code[i: i+2] arg, skip = code[i: i+2]
print_(op, arg, skip, to=i+skip) print_(op, arg, skip, to=i+skip)
@ -762,11 +744,11 @@ def compile(p, flags=0):
else: else:
pattern = None pattern = None
data = _code(p, flags) code = _code(p, flags)
if flags & SRE_FLAG_DEBUG: if flags & SRE_FLAG_DEBUG:
print() print()
dis(data.code) dis(code)
# map in either direction # map in either direction
groupindex = p.state.groupdict groupindex = p.state.groupdict
@ -775,6 +757,7 @@ def compile(p, flags=0):
indexgroup[i] = k indexgroup[i] = k
return _sre.compile( return _sre.compile(
pattern, flags | p.state.flags, data.code, pattern, flags | p.state.flags, code,
p.state.groups-1, groupindex, tuple(indexgroup), p.state.groups-1,
data.repeat_count) groupindex, tuple(indexgroup)
)

View file

@ -13,7 +13,7 @@
# update when constants are added or removed # update when constants are added or removed
MAGIC = 20220423 MAGIC = 20220615
from _sre import MAXREPEAT, MAXGROUPS from _sre import MAXREPEAT, MAXGROUPS

View file

@ -1765,12 +1765,9 @@ def test_dealloc(self):
long_overflow = 2**128 long_overflow = 2**128
self.assertRaises(TypeError, re.finditer, "a", {}) self.assertRaises(TypeError, re.finditer, "a", {})
with self.assertRaises(OverflowError): with self.assertRaises(OverflowError):
_sre.compile("abc", 0, [long_overflow], 0, {}, (), 0) _sre.compile("abc", 0, [long_overflow], 0, {}, ())
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
_sre.compile({}, 0, [], 0, [], [], 0) _sre.compile({}, 0, [], 0, [], [])
with self.assertRaises(RuntimeError):
# invalid repeat_count -1
_sre.compile("abc", 0, [1], 0, {}, (), -1)
def test_search_dot_unicode(self): def test_search_dot_unicode(self):
self.assertTrue(re.search("123.*-", '123abc-')) self.assertTrue(re.search("123.*-", '123abc-'))
@ -2509,27 +2506,6 @@ def test_possesive_repeat(self):
14. SUCCESS 14. SUCCESS
''') ''')
def test_repeat_index(self):
self.assertEqual(get_debug_out(r'(?:ab)*?(?:cd)*'), '''\
MIN_REPEAT 0 MAXREPEAT
LITERAL 97
LITERAL 98
MAX_REPEAT 0 MAXREPEAT
LITERAL 99
LITERAL 100
0. INFO 4 0b0 0 MAXREPEAT (to 5)
5: REPEAT 8 0 MAXREPEAT 0 (to 14)
10. LITERAL 0x61 ('a')
12. LITERAL 0x62 ('b')
14: MIN_UNTIL
15. REPEAT 8 0 MAXREPEAT 1 (to 24)
20. LITERAL 0x63 ('c')
22. LITERAL 0x64 ('d')
24: MAX_UNTIL
25. SUCCESS
''')
class PatternReprTests(unittest.TestCase): class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected): def check(self, pattern, expected):

View file

@ -0,0 +1,3 @@
Revert the :mod:`re` memory leak when a match is terminated by a signal or
memory allocation failure as the implemented fix caused a major performance
regression.

View file

@ -764,7 +764,7 @@ PyDoc_STRVAR(_sre_SRE_Pattern___deepcopy____doc__,
PyDoc_STRVAR(_sre_compile__doc__, PyDoc_STRVAR(_sre_compile__doc__,
"compile($module, /, pattern, flags, code, groups, groupindex,\n" "compile($module, /, pattern, flags, code, groups, groupindex,\n"
" indexgroup, repeat_count)\n" " indexgroup)\n"
"--\n" "--\n"
"\n"); "\n");
@ -774,24 +774,23 @@ PyDoc_STRVAR(_sre_compile__doc__,
static PyObject * static PyObject *
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
PyObject *code, Py_ssize_t groups, PyObject *groupindex, PyObject *code, Py_ssize_t groups, PyObject *groupindex,
PyObject *indexgroup, Py_ssize_t repeat_count); PyObject *indexgroup);
static PyObject * static PyObject *
_sre_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) _sre_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{ {
PyObject *return_value = NULL; PyObject *return_value = NULL;
static const char * const _keywords[] = {"pattern", "flags", "code", "groups", "groupindex", "indexgroup", "repeat_count", NULL}; static const char * const _keywords[] = {"pattern", "flags", "code", "groups", "groupindex", "indexgroup", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "compile", 0}; static _PyArg_Parser _parser = {NULL, _keywords, "compile", 0};
PyObject *argsbuf[7]; PyObject *argsbuf[6];
PyObject *pattern; PyObject *pattern;
int flags; int flags;
PyObject *code; PyObject *code;
Py_ssize_t groups; Py_ssize_t groups;
PyObject *groupindex; PyObject *groupindex;
PyObject *indexgroup; PyObject *indexgroup;
Py_ssize_t repeat_count;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 7, 7, 0, argsbuf); args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 6, 6, 0, argsbuf);
if (!args) { if (!args) {
goto exit; goto exit;
} }
@ -827,19 +826,7 @@ _sre_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject
goto exit; goto exit;
} }
indexgroup = args[5]; indexgroup = args[5];
{ return_value = _sre_compile_impl(module, pattern, flags, code, groups, groupindex, indexgroup);
Py_ssize_t ival = -1;
PyObject *iobj = _PyNumber_Index(args[6]);
if (iobj != NULL) {
ival = PyLong_AsSsize_t(iobj);
Py_DECREF(iobj);
}
if (ival == -1 && PyErr_Occurred()) {
goto exit;
}
repeat_count = ival;
}
return_value = _sre_compile_impl(module, pattern, flags, code, groups, groupindex, indexgroup, repeat_count);
exit: exit:
return return_value; return return_value;
@ -1129,4 +1116,4 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyTypeObject *cls, PyObject *const
} }
return _sre_SRE_Scanner_search_impl(self, cls); return _sre_SRE_Scanner_search_impl(self, cls);
} }
/*[clinic end generated code: output=97e7ce058366760b input=a9049054013a1b77]*/ /*[clinic end generated code: output=fd2f45c941620e6e input=a9049054013a1b77]*/

View file

@ -427,12 +427,6 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
state->lastmark = -1; state->lastmark = -1;
state->lastindex = -1; state->lastindex = -1;
state->repeats_array = PyMem_New(SRE_REPEAT, pattern->repeat_count);
if (!state->repeats_array) {
PyErr_NoMemory();
goto err;
}
state->buffer.buf = NULL; state->buffer.buf = NULL;
ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer); ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer);
if (!ptr) if (!ptr)
@ -482,9 +476,6 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
safely casted to `void*`, see bpo-39943 for details. */ safely casted to `void*`, see bpo-39943 for details. */
PyMem_Free((void*) state->mark); PyMem_Free((void*) state->mark);
state->mark = NULL; state->mark = NULL;
PyMem_Free(state->repeats_array);
state->repeats_array = NULL;
if (state->buffer.buf) if (state->buffer.buf)
PyBuffer_Release(&state->buffer); PyBuffer_Release(&state->buffer);
return NULL; return NULL;
@ -500,8 +491,6 @@ state_fini(SRE_STATE* state)
/* See above PyMem_Del for why we explicitly cast here. */ /* See above PyMem_Del for why we explicitly cast here. */
PyMem_Free((void*) state->mark); PyMem_Free((void*) state->mark);
state->mark = NULL; state->mark = NULL;
PyMem_Free(state->repeats_array);
state->repeats_array = NULL;
} }
/* calculate offset from start of string */ /* calculate offset from start of string */
@ -1408,15 +1397,14 @@ _sre.compile
groups: Py_ssize_t groups: Py_ssize_t
groupindex: object(subclass_of='&PyDict_Type') groupindex: object(subclass_of='&PyDict_Type')
indexgroup: object(subclass_of='&PyTuple_Type') indexgroup: object(subclass_of='&PyTuple_Type')
repeat_count: Py_ssize_t
[clinic start generated code]*/ [clinic start generated code]*/
static PyObject * static PyObject *
_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
PyObject *code, Py_ssize_t groups, PyObject *groupindex, PyObject *code, Py_ssize_t groups, PyObject *groupindex,
PyObject *indexgroup, Py_ssize_t repeat_count) PyObject *indexgroup)
/*[clinic end generated code: output=922af562d51b1657 input=77e39c322501ec2a]*/ /*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/
{ {
/* "compile" pattern descriptor to pattern object */ /* "compile" pattern descriptor to pattern object */
@ -1474,8 +1462,8 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
self->pattern = pattern; self->pattern = pattern;
self->flags = flags; self->flags = flags;
self->groups = groups; self->groups = groups;
self->repeat_count = repeat_count;
if (PyDict_GET_SIZE(groupindex) > 0) { if (PyDict_GET_SIZE(groupindex) > 0) {
Py_INCREF(groupindex); Py_INCREF(groupindex);
@ -1647,7 +1635,7 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
} }
static int static int
_validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self) _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{ {
/* Some variables are manipulated by the macros above */ /* Some variables are manipulated by the macros above */
SRE_CODE op; SRE_CODE op;
@ -1668,8 +1656,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
sre_match() code is robust even if they don't, and the worst sre_match() code is robust even if they don't, and the worst
you can get is nonsensical match results. */ you can get is nonsensical match results. */
GET_ARG; GET_ARG;
if (arg > 2 * (size_t)self->groups + 1) { if (arg > 2 * (size_t)groups + 1) {
VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)self->groups)); VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
FAIL; FAIL;
} }
break; break;
@ -1798,7 +1786,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
if (skip == 0) if (skip == 0)
break; break;
/* Stop 2 before the end; we check the JUMP below */ /* Stop 2 before the end; we check the JUMP below */
if (!_validate_inner(code, code+skip-3, self)) if (!_validate_inner(code, code+skip-3, groups))
FAIL; FAIL;
code += skip-3; code += skip-3;
/* Check that it ends with a JUMP, and that each JUMP /* Check that it ends with a JUMP, and that each JUMP
@ -1827,7 +1815,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
FAIL; FAIL;
if (max > SRE_MAXREPEAT) if (max > SRE_MAXREPEAT)
FAIL; FAIL;
if (!_validate_inner(code, code+skip-4, self)) if (!_validate_inner(code, code+skip-4, groups))
FAIL; FAIL;
code += skip-4; code += skip-4;
GET_OP; GET_OP;
@ -1839,7 +1827,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
case SRE_OP_REPEAT: case SRE_OP_REPEAT:
case SRE_OP_POSSESSIVE_REPEAT: case SRE_OP_POSSESSIVE_REPEAT:
{ {
SRE_CODE op1 = op, min, max, repeat_index; SRE_CODE op1 = op, min, max;
GET_SKIP; GET_SKIP;
GET_ARG; min = arg; GET_ARG; min = arg;
GET_ARG; max = arg; GET_ARG; max = arg;
@ -1847,17 +1835,9 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
FAIL; FAIL;
if (max > SRE_MAXREPEAT) if (max > SRE_MAXREPEAT)
FAIL; FAIL;
if (op1 == SRE_OP_REPEAT) { if (!_validate_inner(code, code+skip-3, groups))
GET_ARG; repeat_index = arg;
if (repeat_index >= (size_t)self->repeat_count)
FAIL;
skip -= 4;
} else {
skip -= 3;
}
if (!_validate_inner(code, code+skip, self))
FAIL; FAIL;
code += skip; code += skip-3;
GET_OP; GET_OP;
if (op1 == SRE_OP_POSSESSIVE_REPEAT) { if (op1 == SRE_OP_POSSESSIVE_REPEAT) {
if (op != SRE_OP_SUCCESS) if (op != SRE_OP_SUCCESS)
@ -1873,7 +1853,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
case SRE_OP_ATOMIC_GROUP: case SRE_OP_ATOMIC_GROUP:
{ {
GET_SKIP; GET_SKIP;
if (!_validate_inner(code, code+skip-2, self)) if (!_validate_inner(code, code+skip-2, groups))
FAIL; FAIL;
code += skip-2; code += skip-2;
GET_OP; GET_OP;
@ -1887,7 +1867,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
case SRE_OP_GROUPREF_UNI_IGNORE: case SRE_OP_GROUPREF_UNI_IGNORE:
case SRE_OP_GROUPREF_LOC_IGNORE: case SRE_OP_GROUPREF_LOC_IGNORE:
GET_ARG; GET_ARG;
if (arg >= (size_t)self->groups) if (arg >= (size_t)groups)
FAIL; FAIL;
break; break;
@ -1896,7 +1876,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
'group' is either an integer group number or a group name, 'group' is either an integer group number or a group name,
'then' and 'else' are sub-regexes, and 'else' is optional. */ 'then' and 'else' are sub-regexes, and 'else' is optional. */
GET_ARG; GET_ARG;
if (arg >= (size_t)self->groups) if (arg >= (size_t)groups)
FAIL; FAIL;
GET_SKIP_ADJ(1); GET_SKIP_ADJ(1);
code--; /* The skip is relative to the first arg! */ code--; /* The skip is relative to the first arg! */
@ -1929,17 +1909,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
code[skip-3] == SRE_OP_JUMP) code[skip-3] == SRE_OP_JUMP)
{ {
VTRACE(("both then and else parts present\n")); VTRACE(("both then and else parts present\n"));
if (!_validate_inner(code+1, code+skip-3, self)) if (!_validate_inner(code+1, code+skip-3, groups))
FAIL; FAIL;
code += skip-2; /* Position after JUMP, at <skipno> */ code += skip-2; /* Position after JUMP, at <skipno> */
GET_SKIP; GET_SKIP;
if (!_validate_inner(code, code+skip-1, self)) if (!_validate_inner(code, code+skip-1, groups))
FAIL; FAIL;
code += skip-1; code += skip-1;
} }
else { else {
VTRACE(("only a then part present\n")); VTRACE(("only a then part present\n"));
if (!_validate_inner(code+1, code+skip-1, self)) if (!_validate_inner(code+1, code+skip-1, groups))
FAIL; FAIL;
code += skip-1; code += skip-1;
} }
@ -1953,7 +1933,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
if (arg & 0x80000000) if (arg & 0x80000000)
FAIL; /* Width too large */ FAIL; /* Width too large */
/* Stop 1 before the end; we check the SUCCESS below */ /* Stop 1 before the end; we check the SUCCESS below */
if (!_validate_inner(code+1, code+skip-2, self)) if (!_validate_inner(code+1, code+skip-2, groups))
FAIL; FAIL;
code += skip-2; code += skip-2;
GET_OP; GET_OP;
@ -1972,19 +1952,18 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, PatternObject *self)
} }
static int static int
_validate_outer(SRE_CODE *code, SRE_CODE *end, PatternObject *self) _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{ {
if (self->groups < 0 || (size_t)self->groups > SRE_MAXGROUPS || if (groups < 0 || (size_t)groups > SRE_MAXGROUPS ||
self->repeat_count < 0 ||
code >= end || end[-1] != SRE_OP_SUCCESS) code >= end || end[-1] != SRE_OP_SUCCESS)
FAIL; FAIL;
return _validate_inner(code, end-1, self); return _validate_inner(code, end-1, groups);
} }
static int static int
_validate(PatternObject *self) _validate(PatternObject *self)
{ {
if (!_validate_outer(self->code, self->code+self->codesize, self)) if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
{ {
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
return 0; return 0;

View file

@ -29,8 +29,6 @@ typedef struct {
Py_ssize_t groups; /* must be first! */ Py_ssize_t groups; /* must be first! */
PyObject* groupindex; /* dict */ PyObject* groupindex; /* dict */
PyObject* indexgroup; /* tuple */ PyObject* indexgroup; /* tuple */
/* the number of REPEATs */
Py_ssize_t repeat_count;
/* compatibility */ /* compatibility */
PyObject* pattern; /* pattern source (or None) */ PyObject* pattern; /* pattern source (or None) */
int flags; /* flags used when compiling pattern source */ int flags; /* flags used when compiling pattern source */
@ -85,8 +83,6 @@ typedef struct {
size_t data_stack_base; size_t data_stack_base;
/* current repeat context */ /* current repeat context */
SRE_REPEAT *repeat; SRE_REPEAT *repeat;
/* repeat contexts array */
SRE_REPEAT *repeats_array;
} SRE_STATE; } SRE_STATE;
typedef struct { typedef struct {

View file

@ -11,7 +11,7 @@
* See the sre.c file for information on usage and redistribution. * See the sre.c file for information on usage and redistribution.
*/ */
#define SRE_MAGIC 20220423 #define SRE_MAGIC 20220615
#define SRE_OP_FAILURE 0 #define SRE_OP_FAILURE 0
#define SRE_OP_SUCCESS 1 #define SRE_OP_SUCCESS 1
#define SRE_OP_ANY 2 #define SRE_OP_ANY 2

View file

@ -1079,12 +1079,17 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
/* <REPEAT> <skip> <1=min> <2=max> /* <REPEAT> <skip> <1=min> <2=max>
<3=repeat_index> item <UNTIL> tail */ <3=repeat_index> item <UNTIL> tail */
TRACE(("|%p|%p|REPEAT %d %d %d\n", pattern, ptr, TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr,
pattern[1], pattern[2], pattern[3])); pattern[1], pattern[2]));
/* install repeat context */
ctx->u.rep = &state->repeats_array[pattern[3]];
/* install new repeat context */
/* TODO(https://github.com/python/cpython/issues/67877): Fix this
* potential memory leak. */
ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep));
if (!ctx->u.rep) {
PyErr_NoMemory();
RETURN_FAILURE;
}
ctx->u.rep->count = -1; ctx->u.rep->count = -1;
ctx->u.rep->pattern = pattern; ctx->u.rep->pattern = pattern;
ctx->u.rep->prev = state->repeat; ctx->u.rep->prev = state->repeat;
@ -1094,6 +1099,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
state->ptr = ptr; state->ptr = ptr;
DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
state->repeat = ctx->u.rep->prev; state->repeat = ctx->u.rep->prev;
PyObject_Free(ctx->u.rep);
if (ret) { if (ret) {
RETURN_ON_ERROR(ret); RETURN_ON_ERROR(ret);
@ -1103,8 +1109,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
TARGET(SRE_OP_MAX_UNTIL): TARGET(SRE_OP_MAX_UNTIL):
/* maximizing repeat */ /* maximizing repeat */
/* <REPEAT> <skip> <1=min> <2=max> /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
<3=repeat_index> item <MAX_UNTIL> tail */
/* FIXME: we probably need to deal with zero-width /* FIXME: we probably need to deal with zero-width
matches in here... */ matches in here... */
@ -1124,7 +1129,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
/* not enough matches */ /* not enough matches */
ctx->u.rep->count = ctx->count; ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
ctx->u.rep->pattern+4); ctx->u.rep->pattern+3);
if (ret) { if (ret) {
RETURN_ON_ERROR(ret); RETURN_ON_ERROR(ret);
RETURN_SUCCESS; RETURN_SUCCESS;
@ -1146,7 +1151,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
DATA_PUSH(&ctx->u.rep->last_ptr); DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr; ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
ctx->u.rep->pattern+4); ctx->u.rep->pattern+3);
DATA_POP(&ctx->u.rep->last_ptr); DATA_POP(&ctx->u.rep->last_ptr);
if (ret) { if (ret) {
MARK_POP_DISCARD(ctx->lastmark); MARK_POP_DISCARD(ctx->lastmark);
@ -1171,8 +1176,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
TARGET(SRE_OP_MIN_UNTIL): TARGET(SRE_OP_MIN_UNTIL):
/* minimizing repeat */ /* minimizing repeat */
/* <REPEAT> <skip> <1=min> <2=max> /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
<3=repeat_index> item <MIN_UNTIL> tail */
ctx->u.rep = state->repeat; ctx->u.rep = state->repeat;
if (!ctx->u.rep) if (!ctx->u.rep)
@ -1189,7 +1193,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
/* not enough matches */ /* not enough matches */
ctx->u.rep->count = ctx->count; ctx->u.rep->count = ctx->count;
DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
ctx->u.rep->pattern+4); ctx->u.rep->pattern+3);
if (ret) { if (ret) {
RETURN_ON_ERROR(ret); RETURN_ON_ERROR(ret);
RETURN_SUCCESS; RETURN_SUCCESS;
@ -1232,7 +1236,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
DATA_PUSH(&ctx->u.rep->last_ptr); DATA_PUSH(&ctx->u.rep->last_ptr);
ctx->u.rep->last_ptr = state->ptr; ctx->u.rep->last_ptr = state->ptr;
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
ctx->u.rep->pattern+4); ctx->u.rep->pattern+3);
DATA_POP(&ctx->u.rep->last_ptr); DATA_POP(&ctx->u.rep->last_ptr);
if (ret) { if (ret) {
RETURN_ON_ERROR(ret); RETURN_ON_ERROR(ret);