(Merge 3.2) Issue #16416: On Mac OS X, operating system data are now always

encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding
(which may be ASCII if no locale environment variable is set), to avoid
inconsistencies with os.fsencode() and os.fsdecode() functions which are
already using UTF-8/surrogateescape.
This commit is contained in:
Victor Stinner 2012-12-03 12:48:53 +01:00
commit 2660e427d1
4 changed files with 65 additions and 18 deletions

View file

@ -12,6 +12,12 @@ What's New in Python 3.3.1?
Core and Builtins
-----------------
- Issue #16416: On Mac OS X, operating system data are now always
encoded/decoded to/from UTF-8/surrogateescape, instead of the locale encoding
(which may be ASCII if no locale environment variable is set), to avoid
inconsistencies with os.fsencode() and os.fsdecode() functions which are
already using UTF-8/surrogateescape.
- Issue #16588: Silence unused-but-set warnings in Python/thread_pthread
- Issue #16546: Fix: ast.YieldFrom argument is now mandatory.

View file

@ -15,10 +15,6 @@ wmain(int argc, wchar_t **argv)
}
#else
#ifdef __APPLE__
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
#endif
int
main(int argc, char **argv)
{
@ -45,11 +41,7 @@ main(int argc, char **argv)
oldloc = strdup(setlocale(LC_ALL, NULL));
setlocale(LC_ALL, "");
for (i = 0; i < argc; i++) {
#ifdef __APPLE__
argv_copy[i] = _Py_DecodeUTF8_surrogateescape(argv[i], strlen(argv[i]));
#else
argv_copy[i] = _Py_char2wchar(argv[i], NULL);
#endif
if (!argv_copy[i]) {
free(oldloc);
fprintf(stderr, "Fatal Python error: "

View file

@ -4809,7 +4809,10 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
#ifdef __APPLE__
/* Simplified UTF-8 decoder using surrogateescape error handler,
used to decode the command line arguments on Mac OS X. */
used to decode the command line arguments on Mac OS X.
Return a pointer to a newly allocated wide character string (use
PyMem_Free() to free the memory), or NULL on memory allocation error. */
wchar_t*
_Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
@ -4820,10 +4823,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size)
/* Note: size will always be longer than the resulting Unicode
character count */
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1)) {
PyErr_NoMemory();
if (PY_SSIZE_T_MAX / sizeof(wchar_t) < (size + 1))
return NULL;
}
unicode = PyMem_Malloc((size + 1) * sizeof(wchar_t));
if (!unicode)
return NULL;

View file

@ -8,6 +8,10 @@
#include <langinfo.h>
#endif
#ifdef __APPLE__
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size);
#endif
PyObject *
_Py_device_encoding(int fd)
{
@ -60,6 +64,17 @@ _Py_device_encoding(int fd)
wchar_t*
_Py_char2wchar(const char* arg, size_t *size)
{
#ifdef __APPLE__
wchar_t *wstr;
wstr = _Py_DecodeUTF8_surrogateescape(arg, strlen(arg));
if (size != NULL) {
if (wstr != NULL)
*size = wcslen(wstr);
else
*size = (size_t)-1;
}
return wstr;
#else
wchar_t *res;
#ifdef HAVE_BROKEN_MBSTOWCS
/* Some platforms have a broken implementation of
@ -145,7 +160,7 @@ _Py_char2wchar(const char* arg, size_t *size)
argsize -= converted;
out++;
}
#else
#else /* HAVE_MBRTOWC */
/* Cannot use C locale for escaping; manually escape as if charset
is ASCII (i.e. escape all bytes > 128. This will still roundtrip
correctly in the locale's charset, which must be an ASCII superset. */
@ -160,7 +175,7 @@ _Py_char2wchar(const char* arg, size_t *size)
else
*out++ = 0xdc00 + *in++;
*out = 0;
#endif
#endif /* HAVE_MBRTOWC */
if (size != NULL)
*size = out - res;
return res;
@ -168,6 +183,7 @@ _Py_char2wchar(const char* arg, size_t *size)
if (size != NULL)
*size = (size_t)-1;
return NULL;
#endif /* __APPLE__ */
}
/* Encode a (wide) character string to the locale encoding with the
@ -184,14 +200,42 @@ _Py_char2wchar(const char* arg, size_t *size)
char*
_Py_wchar2char(const wchar_t *text, size_t *error_pos)
{
#ifdef __APPLE__
Py_ssize_t len;
PyObject *unicode, *bytes = NULL;
char *cpath;
unicode = PyUnicode_FromWideChar(text, wcslen(text));
if (unicode == NULL)
return NULL;
bytes = _PyUnicode_AsUTF8String(unicode, "surrogateescape");
Py_DECREF(unicode);
if (bytes == NULL) {
PyErr_Clear();
if (error_pos != NULL)
*error_pos = (size_t)-1;
return NULL;
}
len = PyBytes_GET_SIZE(bytes);
cpath = PyMem_Malloc(len+1);
if (cpath == NULL) {
PyErr_Clear();
Py_DECREF(bytes);
if (error_pos != NULL)
*error_pos = (size_t)-1;
return NULL;
}
memcpy(cpath, PyBytes_AsString(bytes), len + 1);
Py_DECREF(bytes);
return cpath;
#else /* __APPLE__ */
const size_t len = wcslen(text);
char *result = NULL, *bytes = NULL;
size_t i, size, converted;
wchar_t c, buf[2];
if (error_pos != NULL)
*error_pos = (size_t)-1;
/* The function works in two steps:
1. compute the length of the output buffer in bytes (size)
2. outputs the bytes */
@ -238,11 +282,15 @@ _Py_wchar2char(const wchar_t *text, size_t *error_pos)
size += 1; /* nul byte at the end */
result = PyMem_Malloc(size);
if (result == NULL)
if (result == NULL) {
if (error_pos != NULL)
*error_pos = (size_t)-1;
return NULL;
}
bytes = result;
}
return result;
#endif /* __APPLE__ */
}
/* In principle, this should use HAVE__WSTAT, and _wstat