mirror of
https://github.com/python/cpython
synced 2024-09-18 20:01:39 +00:00
SF #941229: Decode source code with sys.stdin.encoding in interactive
modes like non-interactive modes. This allows for non-latin-1 users to write unicode strings directly and sets Japanese users free from weird manual escaping <wink> in shift_jis environments. (Reviewed by Martin v. Loewis)
This commit is contained in:
parent
5910d81c97
commit
7df44b384a
|
@ -70,6 +70,10 @@ Core and builtins
|
||||||
- unicode.iswide() and unicode.width() is dropped and the East Asian
|
- unicode.iswide() and unicode.width() is dropped and the East Asian
|
||||||
Width support is moved to unicodedata extension module.
|
Width support is moved to unicodedata extension module.
|
||||||
|
|
||||||
|
- Patch #941229: The source code encoding in interactive mode
|
||||||
|
now refers sys.stdin.encoding not just ISO-8859-1 anymore. This
|
||||||
|
allows for non-latin-1 users to write unicode strings directly.
|
||||||
|
|
||||||
Extension modules
|
Extension modules
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
|
|
@ -651,6 +651,63 @@ PyTokenizer_Free(struct tok_state *tok)
|
||||||
PyMem_DEL(tok);
|
PyMem_DEL(tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !defined(PGEN) && defined(Py_USING_UNICODE)
|
||||||
|
static int
|
||||||
|
tok_stdin_decode(struct tok_state *tok, char **inp)
|
||||||
|
{
|
||||||
|
PyObject *enc, *sysstdin, *decoded, *utf8;
|
||||||
|
const char *encoding;
|
||||||
|
char *converted;
|
||||||
|
|
||||||
|
if (PySys_GetFile((char *)"stdin", NULL) != stdin)
|
||||||
|
return 0;
|
||||||
|
sysstdin = PySys_GetObject("stdin");
|
||||||
|
if (sysstdin == NULL || !PyFile_Check(sysstdin))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
enc = ((PyFileObject *)sysstdin)->f_encoding;
|
||||||
|
if (enc == NULL || !PyString_Check(enc))
|
||||||
|
return 0;
|
||||||
|
Py_INCREF(enc);
|
||||||
|
|
||||||
|
encoding = PyString_AsString(enc);
|
||||||
|
decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
|
||||||
|
if (decoded == NULL)
|
||||||
|
goto error_clear;
|
||||||
|
|
||||||
|
utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
|
||||||
|
Py_DECREF(decoded);
|
||||||
|
if (utf8 == NULL)
|
||||||
|
goto error_clear;
|
||||||
|
|
||||||
|
converted = new_string(PyString_AsString(utf8), PyString_Size(utf8));
|
||||||
|
Py_DECREF(utf8);
|
||||||
|
if (converted == NULL)
|
||||||
|
goto error_nomem;
|
||||||
|
|
||||||
|
PyMem_FREE(*inp);
|
||||||
|
*inp = converted;
|
||||||
|
if (tok->encoding != NULL)
|
||||||
|
PyMem_DEL(tok->encoding);
|
||||||
|
tok->encoding = new_string(encoding, strlen(encoding));
|
||||||
|
if (tok->encoding == NULL)
|
||||||
|
goto error_nomem;
|
||||||
|
|
||||||
|
Py_DECREF(enc);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
error_nomem:
|
||||||
|
Py_DECREF(enc);
|
||||||
|
tok->done = E_NOMEM;
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
error_clear:
|
||||||
|
/* Fallback to iso-8859-1: for backward compatibility */
|
||||||
|
Py_DECREF(enc);
|
||||||
|
PyErr_Clear();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Get next char, updating state; error code goes into tok->done */
|
/* Get next char, updating state; error code goes into tok->done */
|
||||||
|
|
||||||
|
@ -690,6 +747,10 @@ tok_nextc(register struct tok_state *tok)
|
||||||
PyMem_FREE(new);
|
PyMem_FREE(new);
|
||||||
tok->done = E_EOF;
|
tok->done = E_EOF;
|
||||||
}
|
}
|
||||||
|
#if !defined(PGEN) && defined(Py_USING_UNICODE)
|
||||||
|
else if (tok_stdin_decode(tok, &new) != 0)
|
||||||
|
PyMem_FREE(new);
|
||||||
|
#endif
|
||||||
else if (tok->start != NULL) {
|
else if (tok->start != NULL) {
|
||||||
size_t start = tok->start - tok->buf;
|
size_t start = tok->start - tok->buf;
|
||||||
size_t oldlen = tok->cur - tok->buf;
|
size_t oldlen = tok->cur - tok->buf;
|
||||||
|
|
Loading…
Reference in a new issue