#16009: JSON error messages now provide more information. Patch by Serhiy Storchaka.

This commit is contained in:
Ezio Melotti 2013-01-03 08:44:15 +02:00
parent fd53a5a011
commit 37623ab5f1
5 changed files with 112 additions and 33 deletions

View file

@ -188,8 +188,8 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
try:
value, end = scan_once(s, end)
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
except StopIteration as err:
raise ValueError(errmsg("Expecting value", s, err.value)) from None
pairs_append((key, value))
try:
nextchar = s[end]
@ -232,8 +232,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
while True:
try:
value, end = scan_once(s, end)
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
except StopIteration as err:
raise ValueError(errmsg("Expecting value", s, err.value)) from None
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
@ -243,7 +243,7 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
if nextchar == ']':
break
elif nextchar != ',':
raise ValueError(errmsg("Expecting ',' delimiter", s, end))
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
try:
if s[end] in _ws:
end += 1
@ -358,6 +358,6 @@ def raw_decode(self, s, idx=0):
"""
try:
obj, end = self.scan_once(s, idx)
except StopIteration:
raise ValueError("No JSON object could be decoded")
except StopIteration as err:
raise ValueError(errmsg("Expecting value", s, err.value)) from None
return obj, end

View file

@ -29,7 +29,7 @@ def _scan_once(string, idx):
try:
nextchar = string[idx]
except IndexError:
raise StopIteration
raise StopIteration(idx)
if nextchar == '"':
return parse_string(string, idx + 1, strict)
@ -60,7 +60,7 @@ def _scan_once(string, idx):
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
raise StopIteration
raise StopIteration(idx)
def scan_once(string, idx):
try:

View file

@ -1,4 +1,5 @@
from test.json_tests import PyTest, CTest
import re
# 2007-10-05
JSONDOCS = [
@ -100,6 +101,82 @@ def test_non_string_keys_dict(self):
#This is for python encoder
self.assertRaises(TypeError, self.dumps, data, indent=True)
def test_truncated_input(self):
test_cases = [
('', 'Expecting value', 0),
('[', 'Expecting value', 1),
('[42', "Expecting ',' delimiter", 3),
('[42,', 'Expecting value', 4),
('["', 'Unterminated string starting at', 1),
('["spam', 'Unterminated string starting at', 1),
('["spam"', "Expecting ',' delimiter", 7),
('["spam",', 'Expecting value', 8),
('{', 'Expecting property name enclosed in double quotes', 1),
('{"', 'Unterminated string starting at', 1),
('{"spam', 'Unterminated string starting at', 1),
('{"spam"', "Expecting ':' delimiter", 7),
('{"spam":', 'Expecting value', 8),
('{"spam":42', "Expecting ',' delimiter", 10),
('{"spam":42,', 'Expecting property name enclosed in double quotes', 11),
]
test_cases += [
('"', 'Unterminated string starting at', 0),
('"spam', 'Unterminated string starting at', 0),
]
for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^{0}: line 1 column {1} \(char {1}\)'.format(
re.escape(msg), idx),
self.loads, data)
def test_unexpected_data(self):
test_cases = [
('[,', 'Expecting value', 1),
('{"spam":[}', 'Expecting value', 9),
('[42:', "Expecting ',' delimiter", 3),
('[42 "spam"', "Expecting ',' delimiter", 4),
('[42,]', 'Expecting value', 4),
('{"spam":[42}', "Expecting ',' delimiter", 11),
('["]', 'Unterminated string starting at', 1),
('["spam":', "Expecting ',' delimiter", 7),
('["spam",]', 'Expecting value', 8),
('{:', 'Expecting property name enclosed in double quotes', 1),
('{,', 'Expecting property name enclosed in double quotes', 1),
('{42', 'Expecting property name enclosed in double quotes', 1),
('[{]', 'Expecting property name enclosed in double quotes', 2),
('{"spam",', "Expecting ':' delimiter", 7),
('{"spam"}', "Expecting ':' delimiter", 7),
('[{"spam"]', "Expecting ':' delimiter", 8),
('{"spam":}', 'Expecting value', 8),
('[{"spam":]', 'Expecting value', 9),
('{"spam":42 "ham"', "Expecting ',' delimiter", 11),
('[{"spam":42]', "Expecting ',' delimiter", 11),
('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11),
]
for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^{0}: line 1 column {1} \(char {1}\)'.format(
re.escape(msg), idx),
self.loads, data)
def test_extra_data(self):
test_cases = [
('[]]', 'Extra data', 2),
('{}}', 'Extra data', 2),
('[],[]', 'Extra data', 2),
('{},{}', 'Extra data', 2),
]
test_cases += [
('42,"spam"', 'Extra data', 2),
('"spam",42', 'Extra data', 6),
]
for data, msg, idx in test_cases:
self.assertRaisesRegex(ValueError,
r'^{0}: line 1 column {1} - line 1 column {2}'
r' \(char {1} - {2}\)'.format(
re.escape(msg), idx, len(data)),
self.loads, data)
class TestPyFail(TestFail, PyTest): pass
class TestCFail(TestFail, CTest): pass

View file

@ -201,6 +201,8 @@ Core and Builtins
Library
-------
- Issue #16009: JSON error messages now provide more information.
- Issue #16828: Fix error incorrectly raised by bz2.compress(b'') and
bz2.BZ2Compressor.compress(b''). Initial patch by Martin Packman.

View file

@ -237,6 +237,16 @@ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
}
}
static void
raise_stop_iteration(Py_ssize_t idx)
{
PyObject *value = PyLong_FromSsize_t(idx);
if (value != NULL) {
PyErr_SetObject(PyExc_StopIteration, value);
Py_DECREF(value);
}
}
static PyObject *
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
/* return (rval, idx) tuple, stealing reference to rval */
@ -306,7 +316,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
buf = PyUnicode_DATA(pystr);
kind = PyUnicode_KIND(pystr);
if (end < 0 || len <= end) {
if (end < 0 || len < end) {
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
goto bail;
}
@ -604,12 +614,12 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
/* only loop if the object is non-empty */
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
while (idx <= end_idx) {
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
while (1) {
PyObject *memokey;
/* read key */
if (PyUnicode_READ(kind, str, idx) != '"') {
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
goto bail;
}
@ -666,11 +676,9 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* bail if the object is closed or we didn't get the , delimiter */
if (idx > end_idx) break;
if (PyUnicode_READ(kind, str, idx) == '}') {
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
break;
}
else if (PyUnicode_READ(kind, str, idx) != ',') {
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
raise_errmsg("Expecting ',' delimiter", pystr, idx);
goto bail;
}
@ -681,12 +689,6 @@ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss
}
}
/* verify that idx < end_idx, str[idx] should be '}' */
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
raise_errmsg("Expecting object", pystr, end_idx);
goto bail;
}
*next_idx_ptr = idx + 1;
if (has_pairs_hook) {
@ -738,8 +740,8 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* only loop if the array is non-empty */
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
while (idx <= end_idx) {
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
while (1) {
/* read any JSON term */
val = scan_once_unicode(s, pystr, idx, &next_idx);
@ -756,11 +758,9 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
/* bail if the array is closed or we didn't get the , delimiter */
if (idx > end_idx) break;
if (PyUnicode_READ(kind, str, idx) == ']') {
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
break;
}
else if (PyUnicode_READ(kind, str, idx) != ',') {
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
raise_errmsg("Expecting ',' delimiter", pystr, idx);
goto bail;
}
@ -773,7 +773,7 @@ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi
/* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
raise_errmsg("Expecting object", pystr, end_idx);
raise_errmsg("Expecting value", pystr, end_idx);
goto bail;
}
*next_idx_ptr = idx + 1;
@ -841,7 +841,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
if (PyUnicode_READ(kind, str, idx) == '-') {
idx++;
if (idx > end_idx) {
PyErr_SetNone(PyExc_StopIteration);
raise_stop_iteration(start);
return NULL;
}
}
@ -857,7 +857,7 @@ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_
}
/* no integer digits, error */
else {
PyErr_SetNone(PyExc_StopIteration);
raise_stop_iteration(start);
return NULL;
}
@ -950,7 +950,7 @@ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_
length = PyUnicode_GET_LENGTH(pystr);
if (idx >= length) {
PyErr_SetNone(PyExc_StopIteration);
raise_stop_iteration(idx);
return NULL;
}