Issue #9713, #10114: Parser functions (eg. PyParser_ASTFromFile) expects

filenames encoded to the filesystem encoding with surrogateescape error handler
(to support undecodable bytes), instead of UTF-8 in strict mode.
This commit is contained in:
Victor Stinner 2010-10-16 13:14:10 +00:00
parent 5a7913eb3b
commit 4c7c8c3023
7 changed files with 69 additions and 28 deletions

View file

@ -10,6 +10,10 @@ What's New in Python 3.2 Beta 1?
Core and Builtins
-----------------
- Issue #9713, #10114: Parser functions (eg. PyParser_ASTFromFile) expects
filenames encoded to the filesystem encoding with surrogateescape error
handler (to support undecodable bytes), instead of UTF-8 in strict mode.
- Issue #9997: Don't let the name "top" have special significance in scope
resolution.
@ -39,7 +43,7 @@ Library
XML namespace attribute is encountered.
- Issue #2830: Add the ``html.escape()`` function, which quotes all problematic
characters by default. Deprecate ``cgi.escape()``.
characters by default. Deprecate ``cgi.escape()``.
- Issue #9409: Fix the regex to match all kind of filenames, for interactive
debugging in doctests.

View file

@ -40,7 +40,7 @@ struct tok_state {
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
const char *filename; /* For error messages */
const char *filename; /* encoded to the filesystem encoding */
int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */

View file

@ -102,6 +102,7 @@ static void
ast_error_finish(const char *filename)
{
PyObject *type, *value, *tback, *errstr, *offset, *loc, *tmp;
PyObject *filename_obj;
long lineno;
assert(PyErr_Occurred());
@ -130,7 +131,11 @@ ast_error_finish(const char *filename)
Py_INCREF(Py_None);
loc = Py_None;
}
tmp = Py_BuildValue("(zlOO)", filename, lineno, offset, loc);
filename_obj = PyUnicode_DecodeFSDefault(filename);
if (filename_obj != NULL)
tmp = Py_BuildValue("(NlOO)", filename_obj, lineno, offset, loc);
else
tmp = NULL;
Py_DECREF(loc);
if (!tmp) {
Py_DECREF(errstr);

View file

@ -524,6 +524,7 @@ static PyObject *
builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
{
char *str;
PyObject *filename_obj;
char *filename;
char *startstr;
int mode = -1;
@ -535,12 +536,16 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
static char *kwlist[] = {"source", "filename", "mode", "flags",
"dont_inherit", NULL};
int start[] = {Py_file_input, Py_eval_input, Py_single_input};
PyObject *result;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oss|ii:compile",
kwlist, &cmd, &filename, &startstr,
&supplied_flags, &dont_inherit))
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&s|ii:compile", kwlist,
&cmd,
PyUnicode_FSConverter, &filename_obj,
&startstr, &supplied_flags,
&dont_inherit))
return NULL;
filename = PyBytes_AS_STRING(filename_obj);
cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8;
if (supplied_flags &
@ -548,7 +553,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
{
PyErr_SetString(PyExc_ValueError,
"compile(): unrecognised flags");
return NULL;
goto error;
}
/* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */
@ -565,14 +570,13 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
else {
PyErr_SetString(PyExc_ValueError,
"compile() arg 3 must be 'exec', 'eval' or 'single'");
return NULL;
goto error;
}
is_ast = PyAST_Check(cmd);
if (is_ast == -1)
return NULL;
goto error;
if (is_ast) {
PyObject *result;
if (supplied_flags & PyCF_ONLY_AST) {
Py_INCREF(cmd);
result = cmd;
@ -585,20 +589,27 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
mod = PyAST_obj2mod(cmd, arena, mode);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
goto error;
}
result = (PyObject*)PyAST_Compile(mod, filename,
&cf, arena);
PyArena_Free(arena);
}
return result;
goto finally;
}
str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf);
if (str == NULL)
return NULL;
goto error;
return Py_CompileStringFlags(str, filename, start[mode], &cf);
result = Py_CompileStringFlags(str, filename, start[mode], &cf);
goto finally;
error:
result = NULL;
finally:
Py_DECREF(filename_obj);
return result;
}
PyDoc_STRVAR(compile_doc,

View file

@ -3942,7 +3942,7 @@ makecode(struct compiler *c, struct assembler *a)
freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars));
if (!freevars)
goto error;
filename = PyUnicode_FromString(c->c_filename);
filename = PyUnicode_DecodeFSDefault(c->c_filename);
if (!filename)
goto error;

View file

@ -1213,7 +1213,7 @@ PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit,
d = PyModule_GetDict(m);
if (PyDict_GetItemString(d, "__file__") == NULL) {
PyObject *f;
f = PyUnicode_FromString(filename);
f = PyUnicode_DecodeFSDefault(filename);
if (f == NULL)
return -1;
if (PyDict_SetItemString(d, "__file__", f) < 0) {
@ -1968,7 +1968,9 @@ err_input(perrdetail *err)
{
PyObject *v, *w, *errtype, *errtext;
PyObject *msg_obj = NULL;
PyObject *filename;
char *msg = NULL;
errtype = PyExc_SyntaxError;
switch (err->error) {
case E_ERROR:
@ -2052,8 +2054,12 @@ err_input(perrdetail *err)
errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
"replace");
}
v = Py_BuildValue("(ziiN)", err->filename,
err->lineno, err->offset, errtext);
filename = PyUnicode_DecodeFSDefault(err->filename);
if (filename != NULL)
v = Py_BuildValue("(NiiN)", filename,
err->lineno, err->offset, errtext);
else
v = NULL;
if (v != NULL) {
if (msg_obj)
w = Py_BuildValue("(OO)", msg_obj, v);

View file

@ -142,16 +142,19 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
Py_ssize_t npath;
size_t taillen;
PyObject *syspath;
const char* path;
PyObject *path;
const char* tail;
PyObject *filebytes;
const char* filepath;
Py_ssize_t len;
PyObject* result;
filepath = _PyUnicode_AsString(filename);
if (filepath == NULL) {
filebytes = PyUnicode_EncodeFSDefault(filename);
if (filebytes == NULL) {
PyErr_Clear();
return NULL;
}
filepath = PyBytes_AS_STRING(filebytes);
/* Search tail of filename in sys.path before giving up */
tail = strrchr(filepath, SEP);
@ -163,7 +166,7 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
syspath = PySys_GetObject("path");
if (syspath == NULL || !PyList_Check(syspath))
return NULL;
goto error;
npath = PyList_Size(syspath);
for (i = 0; i < npath; i++) {
@ -174,14 +177,18 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
}
if (!PyUnicode_Check(v))
continue;
path = _PyUnicode_AsStringAndSize(v, &len);
path = PyUnicode_EncodeFSDefault(v);
if (path == NULL) {
PyErr_Clear();
continue;
}
if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1)
len = PyBytes_GET_SIZE(path);
if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) {
Py_DECREF(path);
continue; /* Too long */
strcpy(namebuf, path);
}
strcpy(namebuf, PyBytes_AS_STRING(path));
Py_DECREF(path);
if (strlen(namebuf) != len)
continue; /* v contains '\0' */
if (len > 0 && namebuf[len-1] != SEP)
@ -189,11 +196,19 @@ _Py_FindSourceFile(PyObject *filename, char* namebuf, size_t namelen, PyObject *
strcpy(namebuf+len, tail);
binary = PyObject_CallMethod(io, "open", "ss", namebuf, "rb");
if (binary != NULL)
return binary;
if (binary != NULL) {
result = binary;
goto finally;
}
PyErr_Clear();
}
return NULL;
goto error;
error:
result = NULL;
finally:
Py_DECREF(filebytes);
return result;
}
int