gh-84489: Properly handle trailing spaces in Py_BuildValue() format strings (GH-21158)

The docs state that the space, tab, colon, and comma characters are
ignored in Py_BuildValue() format strings.

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Zackery Spytz 2023-10-11 04:44:06 -07:00 committed by GitHub
parent f27b830907
commit f83fa0b9eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 35 deletions

View file

@ -319,6 +319,7 @@ def test_buildvalue(self):
self.assertEqual(buildvalue('((OO))', 1, 2), ((1, 2),))
self.assertEqual(buildvalue(' \t,:'), None)
self.assertEqual(buildvalue('O,', 1), 1)
self.assertEqual(buildvalue(' O ', 1), 1)
self.assertEqual(buildvalue('\tO\t', 1), 1)
self.assertEqual(buildvalue('O,O', 1, 2), (1, 2))
@ -327,17 +328,18 @@ def test_buildvalue(self):
self.assertEqual(buildvalue('O O', 1, 2), (1, 2))
self.assertEqual(buildvalue('O\tO', 1, 2), (1, 2))
self.assertEqual(buildvalue('(O,O)', 1, 2), (1, 2))
self.assertEqual(buildvalue('(O, O)', 1, 2), (1, 2))
self.assertEqual(buildvalue(' ( O O) ', 1, 2), (1, 2))
self.assertEqual(buildvalue('\t(\tO\tO)\t', 1, 2), (1, 2))
self.assertEqual(buildvalue('(O, O,)', 1, 2), (1, 2))
self.assertEqual(buildvalue(' ( O O ) ', 1, 2), (1, 2))
self.assertEqual(buildvalue('\t(\tO\tO\t)\t', 1, 2), (1, 2))
self.assertEqual(buildvalue('[O,O]', 1, 2), [1, 2])
self.assertEqual(buildvalue('[O, O]', 1, 2), [1, 2])
self.assertEqual(buildvalue(' [ O O] ', 1, 2), [1, 2])
self.assertEqual(buildvalue('[O, O,]', 1, 2), [1, 2])
self.assertEqual(buildvalue(' [ O O ] ', 1, 2), [1, 2])
self.assertEqual(buildvalue(' [\tO\tO\t] ', 1, 2), [1, 2])
self.assertEqual(buildvalue('{O:O}', 1, 2), {1: 2})
self.assertEqual(buildvalue('{O:O,O:O}', 1, 2, 3, 4), {1: 2, 3: 4})
self.assertEqual(buildvalue('{O: O, O: O}', 1, 2, 3, 4), {1: 2, 3: 4})
self.assertEqual(buildvalue(' { O O O O} ', 1, 2, 3, 4), {1: 2, 3: 4})
self.assertEqual(buildvalue('\t{\tO\tO\tO\tO}\t', 1, 2, 3, 4), {1: 2, 3: 4})
self.assertEqual(buildvalue('{O: O, O: O,}', 1, 2, 3, 4), {1: 2, 3: 4})
self.assertEqual(buildvalue(' { O O O O } ', 1, 2, 3, 4), {1: 2, 3: 4})
self.assertEqual(buildvalue('\t{\tO\tO\tO\tO\t}\t', 1, 2, 3, 4), {1: 2, 3: 4})
self.assertRaises(SystemError, buildvalue, 'O', NULL)
self.assertRaises(SystemError, buildvalue, '(O)', NULL)
@ -378,6 +380,12 @@ def test_buildvalue_ints(self):
self.assertEqual(buildvalue('C', sys.maxunicode), chr(sys.maxunicode))
self.assertRaises(ValueError, buildvalue, 'C', -1)
self.assertRaises(ValueError, buildvalue, 'C', sys.maxunicode+1)
# gh-84489
self.assertRaises(ValueError, buildvalue, '(C )i', -1, 2)
self.assertRaises(ValueError, buildvalue, '[C ]i', -1, 2)
self.assertRaises(ValueError, buildvalue, '{Ci }i', -1, 2, 3)
def test_buildvalue_N(self):
_testcapi.test_buildvalue_N()

View file

@ -0,0 +1 @@
Properly handle trailing spaces before closing parenthesis in :c:func:`Py_BuildValue` format strings.

View file

@ -88,6 +88,24 @@ static PyObject *do_mklist(const char**, va_list *, char, Py_ssize_t);
static PyObject *do_mkdict(const char**, va_list *, char, Py_ssize_t);
static PyObject *do_mkvalue(const char**, va_list *);
static int
check_end(const char **p_format, char endchar)
{
const char *f = *p_format;
while (*f != endchar) {
if (*f != ' ' && *f != '\t' && *f != ',' && *f != ':') {
PyErr_SetString(PyExc_SystemError,
"Unmatched paren in format");
return 0;
}
f++;
}
if (endchar) {
f++;
}
*p_format = f;
return 1;
}
static void
do_ignore(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n)
@ -108,14 +126,9 @@ do_ignore(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n)
}
}
Py_XDECREF(v);
if (**p_format != endchar) {
PyErr_SetString(PyExc_SystemError,
"Unmatched paren in format");
if (!check_end(p_format, endchar)) {
return;
}
if (endchar) {
++*p_format;
}
}
static PyObject *
@ -157,14 +170,10 @@ do_mkdict(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n)
Py_DECREF(k);
Py_DECREF(v);
}
if (**p_format != endchar) {
if (!check_end(p_format, endchar)) {
Py_DECREF(d);
PyErr_SetString(PyExc_SystemError,
"Unmatched paren in format");
return NULL;
}
if (endchar)
++*p_format;
return d;
}
@ -191,14 +200,10 @@ do_mklist(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n)
}
PyList_SET_ITEM(v, i, w);
}
if (**p_format != endchar) {
if (!check_end(p_format, endchar)) {
Py_DECREF(v);
PyErr_SetString(PyExc_SystemError,
"Unmatched paren in format");
return NULL;
}
if (endchar)
++*p_format;
return v;
}
@ -221,14 +226,9 @@ do_mkstack(PyObject **stack, const char **p_format, va_list *p_va,
}
stack[i] = w;
}
if (**p_format != endchar) {
PyErr_SetString(PyExc_SystemError,
"Unmatched paren in format");
if (!check_end(p_format, endchar)) {
goto error;
}
if (endchar) {
++*p_format;
}
return 0;
error:
@ -261,14 +261,10 @@ do_mktuple(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n)
}
PyTuple_SET_ITEM(v, i, w);
}
if (**p_format != endchar) {
if (!check_end(p_format, endchar)) {
Py_DECREF(v);
PyErr_SetString(PyExc_SystemError,
"Unmatched paren in format");
return NULL;
}
if (endchar)
++*p_format;
return v;
}