From 4c7c8c30235e42c47500b91549c2b6154b61f883 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 16 Oct 2010 13:14:10 +0000 Subject: [PATCH] Issue #9713, #10114: Parser functions (eg. PyParser_ASTFromFile) expects filenames encoded to the filesystem encoding with surrogateescape error handler (to support undecodable bytes), instead of UTF-8 in strict mode. --- Misc/NEWS | 6 +++++- Parser/tokenizer.h | 2 +- Python/ast.c | 7 ++++++- Python/bltinmodule.c | 33 ++++++++++++++++++++++----------- Python/compile.c | 2 +- Python/pythonrun.c | 12 +++++++++--- Python/traceback.c | 35 +++++++++++++++++++++++++---------- 7 files changed, 69 insertions(+), 28 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 01a24427cc8..b32f48524ed 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,10 @@ What's New in Python 3.2 Beta 1? Core and Builtins ----------------- +- Issue #9713, #10114: Parser functions (eg. PyParser_ASTFromFile) expects + filenames encoded to the filesystem encoding with surrogateescape error + handler (to support undecodable bytes), instead of UTF-8 in strict mode. + - Issue #9997: Don't let the name "top" have special significance in scope resolution. @@ -39,7 +43,7 @@ Library XML namespace attribute is encountered. - Issue #2830: Add the ``html.escape()`` function, which quotes all problematic - characters by default. Deprecate ``cgi.escape()``. + characters by default. Deprecate ``cgi.escape()``. - Issue #9409: Fix the regex to match all kind of filenames, for interactive debugging in doctests. diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index d9866f6c185..424567d148a 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -40,7 +40,7 @@ struct tok_state { int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ - const char *filename; /* For error messages */ + const char *filename; /* encoded to the filesystem encoding */ int altwarning; /* Issue warning if alternate tabs don't match */ int alterror; /* Issue error if alternate tabs don't match */ int alttabsize; /* Alternate tab spacing */ diff --git a/Python/ast.c b/Python/ast.c index 38643f6a3bd..b9beef88ef9 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -102,6 +102,7 @@ static void ast_error_finish(const char *filename) { PyObject *type, *value, *tback, *errstr, *offset, *loc, *tmp; + PyObject *filename_obj; long lineno; assert(PyErr_Occurred()); @@ -130,7 +131,11 @@ ast_error_finish(const char *filename) Py_INCREF(Py_None); loc = Py_None; } - tmp = Py_BuildValue("(zlOO)", filename, lineno, offset, loc); + filename_obj = PyUnicode_DecodeFSDefault(filename); + if (filename_obj != NULL) + tmp = Py_BuildValue("(NlOO)", filename_obj, lineno, offset, loc); + else + tmp = NULL; Py_DECREF(loc); if (!tmp) { Py_DECREF(errstr); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 2e8d6e21b3f..ece2a3728e8 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -524,6 +524,7 @@ static PyObject * builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) { char *str; + PyObject *filename_obj; char *filename; char *startstr; int mode = -1; @@ -535,12 +536,16 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"source", "filename", "mode", "flags", "dont_inherit", NULL}; int start[] = {Py_file_input, Py_eval_input, Py_single_input}; + PyObject *result; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oss|ii:compile", - kwlist, &cmd, &filename, &startstr, - &supplied_flags, &dont_inherit)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&s|ii:compile", kwlist, + &cmd, + PyUnicode_FSConverter, &filename_obj, + &startstr, &supplied_flags, + &dont_inherit)) return NULL; + filename = PyBytes_AS_STRING(filename_obj); cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8; if (supplied_flags & @@ -548,7 +553,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) { PyErr_SetString(PyExc_ValueError, "compile(): unrecognised flags"); - return NULL; + goto error; } /* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */ @@ -565,14 +570,13 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) else { PyErr_SetString(PyExc_ValueError, "compile() arg 3 must be 'exec', 'eval' or 'single'"); - return NULL; + goto error; } is_ast = PyAST_Check(cmd); if (is_ast == -1) - return NULL; + goto error; if (is_ast) { - PyObject *result; if (supplied_flags & PyCF_ONLY_AST) { Py_INCREF(cmd); result = cmd; @@ -585,20 +589,27 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) mod = PyAST_obj2mod(cmd, arena, mode); if (mod == NULL) { PyArena_Free(arena); - return NULL; + goto error; } result = (PyObject*)PyAST_Compile(mod, filename, &cf, arena); PyArena_Free(arena); } - return result; + goto finally; } str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf); if (str == NULL) - return NULL; + goto error; - return Py_CompileStringFlags(str, filename, start[mode], &cf); + result = Py_CompileStringFlags(str, filename, start[mode], &cf); + goto finally; + +error: + result = NULL; +finally: + Py_DECREF(filename_obj); + return result; } PyDoc_STRVAR(compile_doc, diff --git a/Python/compile.c b/Python/compile.c index d29e48c47a2..1ff085909cc 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -3942,7 +3942,7 @@ makecode(struct compiler *c, struct assembler *a) freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars)); if (!freevars) goto error; - filename = PyUnicode_FromString(c->c_filename); + filename = PyUnicode_DecodeFSDefault(c->c_filename); if (!filename) goto error; diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 73fef75602a..8c535fd8a61 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -1213,7 +1213,7 @@ PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit, d = PyModule_GetDict(m); if (PyDict_GetItemString(d, "__file__") == NULL) { PyObject *f; - f = PyUnicode_FromString(filename); + f = PyUnicode_DecodeFSDefault(filename); if (f == NULL) return -1; if (PyDict_SetItemString(d, "__file__", f) < 0) { @@ -1968,7 +1968,9 @@ err_input(perrdetail *err) { PyObject *v, *w, *errtype, *errtext; PyObject *msg_obj = NULL; + PyObject *filename; char *msg = NULL; + errtype = PyExc_SyntaxError; switch (err->error) { case E_ERROR: @@ -2052,8 +2054,12 @@ err_input(perrdetail *err) errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text), "replace"); } - v = Py_BuildValue("(ziiN)", err->filename, - err->lineno, err->offset, errtext); + filename = PyUnicode_DecodeFSDefault(err->filename); + if (filename != NULL) + v = Py_BuildValue("(NiiN)", filename, + err->lineno, err->offset, errtext); + else + v = NULL; if (v != NULL) { if (msg_obj) w = Py_BuildValue("(OO)", msg_obj, v); diff --git a/Python/traceback.c b/Python/traceback.c index 558755d2098..ab10cfd1618 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -142,16 +142,19 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * Py_ssize_t npath; size_t taillen; PyObject *syspath; - const char* path; + PyObject *path; const char* tail; + PyObject *filebytes; const char* filepath; Py_ssize_t len; + PyObject* result; - filepath = _PyUnicode_AsString(filename); - if (filepath == NULL) { + filebytes = PyUnicode_EncodeFSDefault(filename); + if (filebytes == NULL) { PyErr_Clear(); return NULL; } + filepath = PyBytes_AS_STRING(filebytes); /* Search tail of filename in sys.path before giving up */ tail = strrchr(filepath, SEP); @@ -163,7 +166,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * syspath = PySys_GetObject("path"); if (syspath == NULL || !PyList_Check(syspath)) - return NULL; + goto error; npath = PyList_Size(syspath); for (i = 0; i < npath; i++) { @@ -174,14 +177,18 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * } if (!PyUnicode_Check(v)) continue; - path = _PyUnicode_AsStringAndSize(v, &len); + path = PyUnicode_EncodeFSDefault(v); if (path == NULL) { PyErr_Clear(); continue; } - if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) + len = PyBytes_GET_SIZE(path); + if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) { + Py_DECREF(path); continue; /* Too long */ - strcpy(namebuf, path); + } + strcpy(namebuf, PyBytes_AS_STRING(path)); + Py_DECREF(path); if (strlen(namebuf) != len) continue; /* v contains '\0' */ if (len > 0 && namebuf[len-1] != SEP) @@ -189,11 +196,19 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject * strcpy(namebuf+len, tail); binary = PyObject_CallMethod(io, "open", "ss", namebuf, "rb"); - if (binary != NULL) - return binary; + if (binary != NULL) { + result = binary; + goto finally; + } PyErr_Clear(); } - return NULL; + goto error; + +error: + result = NULL; +finally: + Py_DECREF(filebytes); + return result; } int