bpo-22005: Fixed unpickling instances of datetime classes pickled by Python 2. (GH-11017)

encoding='latin1' should be used for successful decoding.
This commit is contained in:
Serhiy Storchaka 2018-12-07 13:42:10 +02:00 committed by GitHub
parent 4c49da0cb7
commit 8452ca15f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 333 additions and 95 deletions

View file

@ -243,6 +243,9 @@ process more convenient:
*errors* tell pickle how to decode 8-bit string instances pickled by Python
2; these default to 'ASCII' and 'strict', respectively. The *encoding* can
be 'bytes' to read these 8-bit string instances as bytes objects.
Using ``encoding='latin1'`` is required for unpickling NumPy arrays and
instances of :class:`~datetime.datetime`, :class:`~datetime.date` and
:class:`~datetime.time` pickled by Python 2.
.. function:: loads(bytes_object, \*, fix_imports=True, encoding="ASCII", errors="strict")
@ -260,6 +263,9 @@ process more convenient:
*errors* tell pickle how to decode 8-bit string instances pickled by Python
2; these default to 'ASCII' and 'strict', respectively. The *encoding* can
be 'bytes' to read these 8-bit string instances as bytes objects.
Using ``encoding='latin1'`` is required for unpickling NumPy arrays and
instances of :class:`~datetime.datetime`, :class:`~datetime.date` and
:class:`~datetime.time` pickled by Python 2.
The :mod:`pickle` module defines three exceptions:

View file

@ -808,9 +808,19 @@ def __new__(cls, year, month=None, day=None):
year, month, day (required, base 1)
"""
if month is None and isinstance(year, bytes) and len(year) == 4 and \
1 <= year[2] <= 12:
if (month is None and
isinstance(year, (bytes, str)) and len(year) == 4 and
1 <= ord(year[2:3]) <= 12):
# Pickle support
if isinstance(year, str):
try:
year = year.encode('latin1')
except UnicodeEncodeError:
# More informative error message.
raise ValueError(
"Failed to encode latin1 string when unpickling "
"a date object. "
"pickle.load(data, encoding='latin1') is assumed.")
self = object.__new__(cls)
self.__setstate(year)
self._hashcode = -1
@ -1184,8 +1194,18 @@ def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold
tzinfo (default to None)
fold (keyword only, default to zero)
"""
if isinstance(hour, bytes) and len(hour) == 6 and hour[0]&0x7F < 24:
if (isinstance(hour, (bytes, str)) and len(hour) == 6 and
ord(hour[0:1])&0x7F < 24):
# Pickle support
if isinstance(hour, str):
try:
hour = hour.encode('latin1')
except UnicodeEncodeError:
# More informative error message.
raise ValueError(
"Failed to encode latin1 string when unpickling "
"a time object. "
"pickle.load(data, encoding='latin1') is assumed.")
self = object.__new__(cls)
self.__setstate(hour, minute or None)
self._hashcode = -1
@ -1496,8 +1516,18 @@ class datetime(date):
def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
microsecond=0, tzinfo=None, *, fold=0):
if isinstance(year, bytes) and len(year) == 10 and 1 <= year[2]&0x7F <= 12:
if (isinstance(year, (bytes, str)) and len(year) == 10 and
1 <= ord(year[2:3])&0x7F <= 12):
# Pickle support
if isinstance(year, str):
try:
year = bytes(year, 'latin1')
except UnicodeEncodeError:
# More informative error message.
raise ValueError(
"Failed to encode latin1 string when unpickling "
"a datetime object. "
"pickle.load(data, encoding='latin1') is assumed.")
self = object.__new__(cls)
self.__setstate(year, month)
self._hashcode = -1

View file

@ -38,6 +38,7 @@
import _strptime
#
pickle_loads = {pickle.loads, pickle._loads}
pickle_choices = [(pickle, pickle, proto)
for proto in range(pickle.HIGHEST_PROTOCOL + 1)]
@ -1434,6 +1435,19 @@ def test_pickling(self):
self.assertEqual(orig, derived)
self.assertEqual(orig.__reduce__(), orig.__reduce_ex__(2))
def test_compat_unpickle(self):
tests = [
b"cdatetime\ndate\n(S'\\x07\\xdf\\x0b\\x1b'\ntR.",
b'cdatetime\ndate\n(U\x04\x07\xdf\x0b\x1btR.',
b'\x80\x02cdatetime\ndate\nU\x04\x07\xdf\x0b\x1b\x85R.',
]
args = 2015, 11, 27
expected = self.theclass(*args)
for data in tests:
for loads in pickle_loads:
derived = loads(data, encoding='latin1')
self.assertEqual(derived, expected)
def test_compare(self):
t1 = self.theclass(2, 3, 4)
t2 = self.theclass(2, 3, 4)
@ -2098,6 +2112,24 @@ def test_pickling_subclass_datetime(self):
derived = unpickler.loads(green)
self.assertEqual(orig, derived)
def test_compat_unpickle(self):
tests = [
b'cdatetime\ndatetime\n('
b"S'\\x07\\xdf\\x0b\\x1b\\x14;\\x01\\x00\\x10\\x00'\ntR.",
b'cdatetime\ndatetime\n('
b'U\n\x07\xdf\x0b\x1b\x14;\x01\x00\x10\x00tR.',
b'\x80\x02cdatetime\ndatetime\n'
b'U\n\x07\xdf\x0b\x1b\x14;\x01\x00\x10\x00\x85R.',
]
args = 2015, 11, 27, 20, 59, 1, 64**2
expected = self.theclass(*args)
for data in tests:
for loads in pickle_loads:
derived = loads(data, encoding='latin1')
self.assertEqual(derived, expected)
def test_more_compare(self):
# The test_compare() inherited from TestDate covers the error cases.
# We just want to test lexicographic ordering on the members datetime
@ -3069,6 +3101,19 @@ def test_pickling_subclass_time(self):
derived = unpickler.loads(green)
self.assertEqual(orig, derived)
def test_compat_unpickle(self):
tests = [
b"cdatetime\ntime\n(S'\\x14;\\x10\\x00\\x10\\x00'\ntR.",
b'cdatetime\ntime\n(U\x06\x14;\x10\x00\x10\x00tR.',
b'\x80\x02cdatetime\ntime\nU\x06\x14;\x10\x00\x10\x00\x85R.',
]
args = 20, 59, 16, 64**2
expected = self.theclass(*args)
for data in tests:
for loads in pickle_loads:
derived = loads(data, encoding='latin1')
self.assertEqual(derived, expected)
def test_bool(self):
# time is always True.
cls = self.theclass
@ -3441,6 +3486,40 @@ def test_pickling(self):
self.assertEqual(derived.tzname(), 'cookie')
self.assertEqual(orig.__reduce__(), orig.__reduce_ex__(2))
def test_compat_unpickle(self):
tests = [
b"cdatetime\ntime\n(S'\\x05\\x06\\x07\\x01\\xe2@'\n"
b"ctest.datetimetester\nPicklableFixedOffset\n(tR"
b"(dS'_FixedOffset__offset'\ncdatetime\ntimedelta\n"
b"(I-1\nI68400\nI0\ntRs"
b"S'_FixedOffset__dstoffset'\nNs"
b"S'_FixedOffset__name'\nS'cookie'\nsbtR.",
b'cdatetime\ntime\n(U\x06\x05\x06\x07\x01\xe2@'
b'ctest.datetimetester\nPicklableFixedOffset\n)R'
b'}(U\x14_FixedOffset__offsetcdatetime\ntimedelta\n'
b'(J\xff\xff\xff\xffJ0\x0b\x01\x00K\x00tR'
b'U\x17_FixedOffset__dstoffsetN'
b'U\x12_FixedOffset__nameU\x06cookieubtR.',
b'\x80\x02cdatetime\ntime\nU\x06\x05\x06\x07\x01\xe2@'
b'ctest.datetimetester\nPicklableFixedOffset\n)R'
b'}(U\x14_FixedOffset__offsetcdatetime\ntimedelta\n'
b'J\xff\xff\xff\xffJ0\x0b\x01\x00K\x00\x87R'
b'U\x17_FixedOffset__dstoffsetN'
b'U\x12_FixedOffset__nameU\x06cookieub\x86R.',
]
tinfo = PicklableFixedOffset(-300, 'cookie')
expected = self.theclass(5, 6, 7, 123456, tzinfo=tinfo)
for data in tests:
for loads in pickle_loads:
derived = loads(data, encoding='latin1')
self.assertEqual(derived, expected, repr(data))
self.assertIsInstance(derived.tzinfo, PicklableFixedOffset)
self.assertEqual(derived.utcoffset(), timedelta(minutes=-300))
self.assertEqual(derived.tzname(), 'cookie')
def test_more_bool(self):
# time is always True.
cls = self.theclass
@ -3789,6 +3868,43 @@ def test_pickling(self):
self.assertEqual(derived.tzname(), 'cookie')
self.assertEqual(orig.__reduce__(), orig.__reduce_ex__(2))
def test_compat_unpickle(self):
tests = [
b'cdatetime\ndatetime\n'
b"(S'\\x07\\xdf\\x0b\\x1b\\x14;\\x01\\x01\\xe2@'\n"
b'ctest.datetimetester\nPicklableFixedOffset\n(tR'
b"(dS'_FixedOffset__offset'\ncdatetime\ntimedelta\n"
b'(I-1\nI68400\nI0\ntRs'
b"S'_FixedOffset__dstoffset'\nNs"
b"S'_FixedOffset__name'\nS'cookie'\nsbtR.",
b'cdatetime\ndatetime\n'
b'(U\n\x07\xdf\x0b\x1b\x14;\x01\x01\xe2@'
b'ctest.datetimetester\nPicklableFixedOffset\n)R'
b'}(U\x14_FixedOffset__offsetcdatetime\ntimedelta\n'
b'(J\xff\xff\xff\xffJ0\x0b\x01\x00K\x00tR'
b'U\x17_FixedOffset__dstoffsetN'
b'U\x12_FixedOffset__nameU\x06cookieubtR.',
b'\x80\x02cdatetime\ndatetime\n'
b'U\n\x07\xdf\x0b\x1b\x14;\x01\x01\xe2@'
b'ctest.datetimetester\nPicklableFixedOffset\n)R'
b'}(U\x14_FixedOffset__offsetcdatetime\ntimedelta\n'
b'J\xff\xff\xff\xffJ0\x0b\x01\x00K\x00\x87R'
b'U\x17_FixedOffset__dstoffsetN'
b'U\x12_FixedOffset__nameU\x06cookieub\x86R.',
]
args = 2015, 11, 27, 20, 59, 1, 123456
tinfo = PicklableFixedOffset(-300, 'cookie')
expected = self.theclass(*args, **{'tzinfo': tinfo})
for data in tests:
for loads in pickle_loads:
derived = loads(data, encoding='latin1')
self.assertEqual(derived, expected)
self.assertIsInstance(derived.tzinfo, PicklableFixedOffset)
self.assertEqual(derived.utcoffset(), timedelta(minutes=-300))
self.assertEqual(derived.tzname(), 'cookie')
def test_extreme_hashes(self):
# If an attempt is made to hash these via subtracting the offset
# then hashing a datetime object, OverflowError results. The

View file

@ -0,0 +1,3 @@
Implemented unpickling instances of :class:`~datetime.datetime`,
:class:`~datetime.date` and :class:`~datetime.time` pickled by Python 2.
``encoding='latin1'`` should be used for successful decoding.

View file

@ -2788,31 +2788,60 @@ static PyGetSetDef date_getset[] = {
static char *date_kws[] = {"year", "month", "day", NULL};
static PyObject *
date_from_pickle(PyTypeObject *type, PyObject *state)
{
PyDateTime_Date *me;
me = (PyDateTime_Date *) (type->tp_alloc(type, 0));
if (me != NULL) {
const char *pdata = PyBytes_AS_STRING(state);
memcpy(me->data, pdata, _PyDateTime_DATE_DATASIZE);
me->hashcode = -1;
}
return (PyObject *)me;
}
static PyObject *
date_new(PyTypeObject *type, PyObject *args, PyObject *kw)
{
PyObject *self = NULL;
PyObject *state;
int year;
int month;
int day;
/* Check for invocation from pickle with __getstate__ state */
if (PyTuple_GET_SIZE(args) == 1) {
state = PyTuple_GET_ITEM(args, 0);
if (PyBytes_Check(state) &&
PyBytes_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE &&
MONTH_IS_SANE(PyBytes_AS_STRING(state)[2]))
{
PyDateTime_Date *me;
me = (PyDateTime_Date *) (type->tp_alloc(type, 0));
if (me != NULL) {
char *pdata = PyBytes_AS_STRING(state);
memcpy(me->data, pdata, _PyDateTime_DATE_DATASIZE);
me->hashcode = -1;
if (PyTuple_GET_SIZE(args) >= 1) {
PyObject *state = PyTuple_GET_ITEM(args, 0);
if (PyBytes_Check(state)) {
if (PyBytes_GET_SIZE(state) == _PyDateTime_DATE_DATASIZE &&
MONTH_IS_SANE(PyBytes_AS_STRING(state)[2]))
{
return date_from_pickle(type, state);
}
}
else if (PyUnicode_Check(state)) {
if (PyUnicode_READY(state)) {
return NULL;
}
if (PyUnicode_GET_LENGTH(state) == _PyDateTime_DATE_DATASIZE &&
MONTH_IS_SANE(PyUnicode_READ_CHAR(state, 2)))
{
state = PyUnicode_AsLatin1String(state);
if (state == NULL) {
if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
/* More informative error message. */
PyErr_SetString(PyExc_ValueError,
"Failed to encode latin1 string when unpickling "
"a date object. "
"pickle.load(data, encoding='latin1') is assumed.");
}
return NULL;
}
self = date_from_pickle(type, state);
Py_DECREF(state);
return self;
}
return (PyObject *)me;
}
}
@ -3901,11 +3930,43 @@ static PyGetSetDef time_getset[] = {
static char *time_kws[] = {"hour", "minute", "second", "microsecond",
"tzinfo", "fold", NULL};
static PyObject *
time_from_pickle(PyTypeObject *type, PyObject *state, PyObject *tzinfo)
{
PyDateTime_Time *me;
char aware = (char)(tzinfo != Py_None);
if (aware && check_tzinfo_subclass(tzinfo) < 0) {
PyErr_SetString(PyExc_TypeError, "bad tzinfo state arg");
return NULL;
}
me = (PyDateTime_Time *) (type->tp_alloc(type, aware));
if (me != NULL) {
const char *pdata = PyBytes_AS_STRING(state);
memcpy(me->data, pdata, _PyDateTime_TIME_DATASIZE);
me->hashcode = -1;
me->hastzinfo = aware;
if (aware) {
Py_INCREF(tzinfo);
me->tzinfo = tzinfo;
}
if (pdata[0] & (1 << 7)) {
me->data[0] -= 128;
me->fold = 1;
}
else {
me->fold = 0;
}
}
return (PyObject *)me;
}
static PyObject *
time_new(PyTypeObject *type, PyObject *args, PyObject *kw)
{
PyObject *self = NULL;
PyObject *state;
int hour = 0;
int minute = 0;
int second = 0;
@ -3914,47 +3975,42 @@ time_new(PyTypeObject *type, PyObject *args, PyObject *kw)
int fold = 0;
/* Check for invocation from pickle with __getstate__ state */
if (PyTuple_GET_SIZE(args) >= 1 &&
PyTuple_GET_SIZE(args) <= 2)
{
state = PyTuple_GET_ITEM(args, 0);
if (PyBytes_Check(state) &&
PyBytes_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE &&
(0x7F & ((unsigned char) (PyBytes_AS_STRING(state)[0]))) < 24)
{
PyDateTime_Time *me;
char aware;
if (PyTuple_GET_SIZE(args) == 2) {
tzinfo = PyTuple_GET_ITEM(args, 1);
if (check_tzinfo_subclass(tzinfo) < 0) {
PyErr_SetString(PyExc_TypeError, "bad "
"tzinfo state arg");
if (PyTuple_GET_SIZE(args) >= 1 && PyTuple_GET_SIZE(args) <= 2) {
PyObject *state = PyTuple_GET_ITEM(args, 0);
if (PyTuple_GET_SIZE(args) == 2) {
tzinfo = PyTuple_GET_ITEM(args, 1);
}
if (PyBytes_Check(state)) {
if (PyBytes_GET_SIZE(state) == _PyDateTime_TIME_DATASIZE &&
(0x7F & ((unsigned char) (PyBytes_AS_STRING(state)[0]))) < 24)
{
return time_from_pickle(type, state, tzinfo);
}
}
else if (PyUnicode_Check(state)) {
if (PyUnicode_READY(state)) {
return NULL;
}
if (PyUnicode_GET_LENGTH(state) == _PyDateTime_TIME_DATASIZE &&
(0x7F & PyUnicode_READ_CHAR(state, 2)) < 24)
{
state = PyUnicode_AsLatin1String(state);
if (state == NULL) {
if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
/* More informative error message. */
PyErr_SetString(PyExc_ValueError,
"Failed to encode latin1 string when unpickling "
"a time object. "
"pickle.load(data, encoding='latin1') is assumed.");
}
return NULL;
}
self = time_from_pickle(type, state, tzinfo);
Py_DECREF(state);
return self;
}
aware = (char)(tzinfo != Py_None);
me = (PyDateTime_Time *) (type->tp_alloc(type, aware));
if (me != NULL) {
char *pdata = PyBytes_AS_STRING(state);
memcpy(me->data, pdata, _PyDateTime_TIME_DATASIZE);
me->hashcode = -1;
me->hastzinfo = aware;
if (aware) {
Py_INCREF(tzinfo);
me->tzinfo = tzinfo;
}
if (pdata[0] & (1 << 7)) {
me->data[0] -= 128;
me->fold = 1;
}
else {
me->fold = 0;
}
}
return (PyObject *)me;
}
tzinfo = Py_None;
}
if (PyArg_ParseTupleAndKeywords(args, kw, "|iiiiO$i", time_kws,
@ -4540,11 +4596,43 @@ static char *datetime_kws[] = {
"microsecond", "tzinfo", "fold", NULL
};
static PyObject *
datetime_from_pickle(PyTypeObject *type, PyObject *state, PyObject *tzinfo)
{
PyDateTime_DateTime *me;
char aware = (char)(tzinfo != Py_None);
if (aware && check_tzinfo_subclass(tzinfo) < 0) {
PyErr_SetString(PyExc_TypeError, "bad tzinfo state arg");
return NULL;
}
me = (PyDateTime_DateTime *) (type->tp_alloc(type , aware));
if (me != NULL) {
const char *pdata = PyBytes_AS_STRING(state);
memcpy(me->data, pdata, _PyDateTime_DATETIME_DATASIZE);
me->hashcode = -1;
me->hastzinfo = aware;
if (aware) {
Py_INCREF(tzinfo);
me->tzinfo = tzinfo;
}
if (pdata[2] & (1 << 7)) {
me->data[2] -= 128;
me->fold = 1;
}
else {
me->fold = 0;
}
}
return (PyObject *)me;
}
static PyObject *
datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw)
{
PyObject *self = NULL;
PyObject *state;
int year;
int month;
int day;
@ -4556,47 +4644,42 @@ datetime_new(PyTypeObject *type, PyObject *args, PyObject *kw)
PyObject *tzinfo = Py_None;
/* Check for invocation from pickle with __getstate__ state */
if (PyTuple_GET_SIZE(args) >= 1 &&
PyTuple_GET_SIZE(args) <= 2)
{
state = PyTuple_GET_ITEM(args, 0);
if (PyBytes_Check(state) &&
PyBytes_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE &&
MONTH_IS_SANE(PyBytes_AS_STRING(state)[2] & 0x7F))
{
PyDateTime_DateTime *me;
char aware;
if (PyTuple_GET_SIZE(args) == 2) {
tzinfo = PyTuple_GET_ITEM(args, 1);
if (check_tzinfo_subclass(tzinfo) < 0) {
PyErr_SetString(PyExc_TypeError, "bad "
"tzinfo state arg");
if (PyTuple_GET_SIZE(args) >= 1 && PyTuple_GET_SIZE(args) <= 2) {
PyObject *state = PyTuple_GET_ITEM(args, 0);
if (PyTuple_GET_SIZE(args) == 2) {
tzinfo = PyTuple_GET_ITEM(args, 1);
}
if (PyBytes_Check(state)) {
if (PyBytes_GET_SIZE(state) == _PyDateTime_DATETIME_DATASIZE &&
MONTH_IS_SANE(PyBytes_AS_STRING(state)[2] & 0x7F))
{
return datetime_from_pickle(type, state, tzinfo);
}
}
else if (PyUnicode_Check(state)) {
if (PyUnicode_READY(state)) {
return NULL;
}
if (PyUnicode_GET_LENGTH(state) == _PyDateTime_DATETIME_DATASIZE &&
MONTH_IS_SANE(PyUnicode_READ_CHAR(state, 2) & 0x7F))
{
state = PyUnicode_AsLatin1String(state);
if (state == NULL) {
if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
/* More informative error message. */
PyErr_SetString(PyExc_ValueError,
"Failed to encode latin1 string when unpickling "
"a datetime object. "
"pickle.load(data, encoding='latin1') is assumed.");
}
return NULL;
}
self = datetime_from_pickle(type, state, tzinfo);
Py_DECREF(state);
return self;
}
aware = (char)(tzinfo != Py_None);
me = (PyDateTime_DateTime *) (type->tp_alloc(type , aware));
if (me != NULL) {
char *pdata = PyBytes_AS_STRING(state);
memcpy(me->data, pdata, _PyDateTime_DATETIME_DATASIZE);
me->hashcode = -1;
me->hastzinfo = aware;
if (aware) {
Py_INCREF(tzinfo);
me->tzinfo = tzinfo;
}
if (pdata[2] & (1 << 7)) {
me->data[2] -= 128;
me->fold = 1;
}
else {
me->fold = 0;
}
}
return (PyObject *)me;
}
tzinfo = Py_None;
}
if (PyArg_ParseTupleAndKeywords(args, kw, "iii|iiiiO$i", datetime_kws,