SF #941229: Decode source code with sys.stdin.encoding in interactive

modes like non-interactive modes.  This allows for non-latin-1 users
to write unicode strings directly and sets Japanese users free from
weird manual escaping <wink> in shift_jis environments.
(Reviewed by Martin v. Loewis)
This commit is contained in:
Hye-Shik Chang 2004-08-04 17:36:41 +00:00
parent 5910d81c97
commit 7df44b384a
2 changed files with 65 additions and 0 deletions

View file

@ -70,6 +70,10 @@ Core and builtins
- unicode.iswide() and unicode.width() is dropped and the East Asian
Width support is moved to unicodedata extension module.
- Patch #941229: The source code encoding in interactive mode
now refers sys.stdin.encoding not just ISO-8859-1 anymore. This
allows for non-latin-1 users to write unicode strings directly.
Extension modules
-----------------

View file

@ -651,6 +651,63 @@ PyTokenizer_Free(struct tok_state *tok)
PyMem_DEL(tok);
}
#if !defined(PGEN) && defined(Py_USING_UNICODE)
static int
tok_stdin_decode(struct tok_state *tok, char **inp)
{
PyObject *enc, *sysstdin, *decoded, *utf8;
const char *encoding;
char *converted;
if (PySys_GetFile((char *)"stdin", NULL) != stdin)
return 0;
sysstdin = PySys_GetObject("stdin");
if (sysstdin == NULL || !PyFile_Check(sysstdin))
return 0;
enc = ((PyFileObject *)sysstdin)->f_encoding;
if (enc == NULL || !PyString_Check(enc))
return 0;
Py_INCREF(enc);
encoding = PyString_AsString(enc);
decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
if (decoded == NULL)
goto error_clear;
utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
Py_DECREF(decoded);
if (utf8 == NULL)
goto error_clear;
converted = new_string(PyString_AsString(utf8), PyString_Size(utf8));
Py_DECREF(utf8);
if (converted == NULL)
goto error_nomem;
PyMem_FREE(*inp);
*inp = converted;
if (tok->encoding != NULL)
PyMem_DEL(tok->encoding);
tok->encoding = new_string(encoding, strlen(encoding));
if (tok->encoding == NULL)
goto error_nomem;
Py_DECREF(enc);
return 0;
error_nomem:
Py_DECREF(enc);
tok->done = E_NOMEM;
return -1;
error_clear:
/* Fallback to iso-8859-1: for backward compatibility */
Py_DECREF(enc);
PyErr_Clear();
return 0;
}
#endif
/* Get next char, updating state; error code goes into tok->done */
@ -690,6 +747,10 @@ tok_nextc(register struct tok_state *tok)
PyMem_FREE(new);
tok->done = E_EOF;
}
#if !defined(PGEN) && defined(Py_USING_UNICODE)
else if (tok_stdin_decode(tok, &new) != 0)
PyMem_FREE(new);
#endif
else if (tok->start != NULL) {
size_t start = tok->start - tok->buf;
size_t oldlen = tok->cur - tok->buf;